Update prebuilt Clang to r416183b from Android.
https://android.googlesource.com/platform/prebuilts/clang/host/
linux-x86/+/06a71ddac05c22edb2d10b590e1769b3f8619bef
clang 12.0.5 (based on r416183b) from build 7284624.
Change-Id: I277a316abcf47307562d8b748b84870f31a72866
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/linux-x64/clang/include/llvm/CodeGen/AccelTable.h b/linux-x64/clang/include/llvm/CodeGen/AccelTable.h
index 734531a..f8f6b54 100644
--- a/linux-x64/clang/include/llvm/CodeGen/AccelTable.h
+++ b/linux-x64/clang/include/llvm/CodeGen/AccelTable.h
@@ -101,8 +101,6 @@
///
/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
/// function.
-///
-/// TODO: Add DWARF v5 emission code.
namespace llvm {
diff --git a/linux-x64/clang/include/llvm/CodeGen/Analysis.h b/linux-x64/clang/include/llvm/CodeGen/Analysis.h
index 0be0ac2..bdfb416 100644
--- a/linux-x64/clang/include/llvm/CodeGen/Analysis.h
+++ b/linux-x64/clang/include/llvm/CodeGen/Analysis.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CodeGen.h"
@@ -31,9 +30,6 @@
class TargetLoweringBase;
class TargetLowering;
class TargetMachine;
-class SDNode;
-class SDValue;
-class SelectionDAG;
struct EVT;
/// Compute the linearized index of a member in a nested
@@ -96,11 +92,6 @@
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
GlobalValue *ExtractTypeInfo(Value *V);
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
- const TargetLowering &TLI);
-
/// getFCmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR floating-point condition code. This includes
/// consideration of global floating-point math flags.
@@ -122,7 +113,7 @@
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM);
+bool isInTailCallPosition(const CallBase &Call, const TargetMachine &TM);
/// Test if given that the input instruction is in the tail call position, if
/// there is an attribute mismatch between the caller and the callee that will
diff --git a/linux-x64/clang/include/llvm/CodeGen/AntiDepBreaker.h b/linux-x64/clang/include/llvm/CodeGen/AntiDepBreaker.h
new file mode 100644
index 0000000..0553d7d
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,96 @@
+//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class RegisterClassInfo;
+
+/// This class works in conjunction with the post-RA scheduler to rename
+/// registers to break register anti-dependencies (WAR hazards).
+class AntiDepBreaker {
+public:
+ using DbgValueVector =
+ std::vector<std::pair<MachineInstr *, MachineInstr *>>;
+
+ virtual ~AntiDepBreaker();
+
+ /// Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) = 0;
+
+ /// Identifiy anti-dependencies within a basic-block region and break them by
+ /// renaming registers. Return the number of anti-dependencies broken.
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ virtual void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) = 0;
+
+ /// Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() = 0;
+
+ /// Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) {
+ assert(MI.isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI.getDebugOperand(0).isReg() &&
+ MI.getDebugOperand(0).getReg() == OldReg)
+ MI.getDebugOperand(0).setReg(NewReg);
+ }
+
+ /// Update all DBG_VALUE instructions that may be affected by the dependency
+ /// breaker's update of ParentMI to use NewReg.
+ void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI,
+ unsigned OldReg, unsigned NewReg) {
+ // The following code is dependent on the order in which the DbgValues are
+ // constructed in ScheduleDAGInstrs::buildSchedGraph.
+ MachineInstr *PrevDbgMI = nullptr;
+ for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) {
+ MachineInstr *PrevMI = DV.second;
+ if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) {
+ MachineInstr *DbgMI = DV.first;
+ UpdateDbgValue(*DbgMI, OldReg, NewReg);
+ PrevDbgMI = DbgMI;
+ } else if (PrevDbgMI) {
+ break; // If no match and already found a DBG_VALUE, we're done.
+ }
+ }
+ }
+};
+
+AntiDepBreaker *createAggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs);
+
+AntiDepBreaker *createCriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/AsmPrinter.h b/linux-x64/clang/include/llvm/CodeGen/AsmPrinter.h
index 9c2097b..76486b0 100644
--- a/linux-x64/clang/include/llvm/CodeGen/AsmPrinter.h
+++ b/linux-x64/clang/include/llvm/CodeGen/AsmPrinter.h
@@ -17,8 +17,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -58,7 +56,6 @@
class MachineOptimizationRemarkEmitter;
class MCAsmInfo;
class MCCFIInstruction;
-struct MCCodePaddingContext;
class MCContext;
class MCExpr;
class MCInst;
@@ -69,10 +66,17 @@
class MCTargetOptions;
class MDNode;
class Module;
+class PseudoProbeHandler;
class raw_ostream;
class StackMaps;
+class StringRef;
class TargetLoweringObjectFile;
class TargetMachine;
+class Twine;
+
+namespace remarks {
+class RemarkStreamer;
+}
/// This class is intended to be used as a driving class for all asm writers.
class AsmPrinter : public MachineFunctionPass {
@@ -98,7 +102,7 @@
/// This is a pointer to the current MachineModuleInfo.
MachineModuleInfo *MMI = nullptr;
- /// This is a pointer to the current MachineLoopInfo.
+ /// This is a pointer to the current MachineDominatorTree.
MachineDominatorTree *MDT = nullptr;
/// This is a pointer to the current MachineLoopInfo.
@@ -107,36 +111,37 @@
/// Optimization remark emitter.
MachineOptimizationRemarkEmitter *ORE;
+ /// The symbol for the entry in __patchable_function_entires.
+ MCSymbol *CurrentPatchableFunctionEntrySym = nullptr;
+
/// The symbol for the current function. This is recalculated at the beginning
/// of each call to runOnMachineFunction().
MCSymbol *CurrentFnSym = nullptr;
+ /// The symbol for the current function descriptor on AIX. This is created
+ /// at the beginning of each call to SetupMachineFunction().
+ MCSymbol *CurrentFnDescSym = nullptr;
+
/// The symbol used to represent the start of the current function for the
/// purpose of calculating its size (e.g. using the .size directive). By
/// default, this is equal to CurrentFnSym.
MCSymbol *CurrentFnSymForSize = nullptr;
+ /// Map a basic block section ID to the begin and end symbols of that section
+ /// which determine the section's range.
+ struct MBBSectionRange {
+ MCSymbol *BeginLabel, *EndLabel;
+ };
+
+ MapVector<unsigned, MBBSectionRange> MBBSectionRanges;
+
/// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of
/// its number of uses by other globals.
using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
-private:
- MCSymbol *CurrentFnBegin = nullptr;
- MCSymbol *CurrentFnEnd = nullptr;
- MCSymbol *CurExceptionSym = nullptr;
-
- // The garbage collection metadata printer table.
- void *GCMetadataPrinters = nullptr; // Really a DenseMap.
-
- /// Emit comments in assembly output if this is true.
- bool VerboseAsm;
-
- static char ID;
-
-protected:
- /// Protected struct HandlerInfo and Handlers permit target extended
- /// AsmPrinter adds their own handlers.
+ /// struct HandlerInfo and Handlers permit users or target extended
+ /// AsmPrinter to add their own handlers.
struct HandlerInfo {
std::unique_ptr<AsmPrinterHandler> Handler;
const char *TimerName;
@@ -152,9 +157,33 @@
TimerGroupDescription(TimerGroupDescription) {}
};
+private:
+ MCSymbol *CurrentFnEnd = nullptr;
+
+ /// Map a basic block section ID to the exception symbol associated with that
+ /// section. Map entries are assigned and looked up via
+ /// AsmPrinter::getMBBExceptionSym.
+ DenseMap<unsigned, MCSymbol *> MBBSectionExceptionSyms;
+
+ // The symbol used to represent the start of the current BB section of the
+ // function. This is used to calculate the size of the BB section.
+ MCSymbol *CurrentSectionBeginSym = nullptr;
+
+ // The garbage collection metadata printer table.
+ void *GCMetadataPrinters = nullptr; // Really a DenseMap.
+
+ /// Emit comments in assembly output if this is true.
+ bool VerboseAsm;
+
+ static char ID;
+
+protected:
+ MCSymbol *CurrentFnBegin = nullptr;
+
/// A vector of all debug/EH info emitters we should use. This vector
/// maintains ownership of the emitters.
- SmallVector<HandlerInfo, 1> Handlers;
+ std::vector<HandlerInfo> Handlers;
+ size_t NumUserHandlers = 0;
public:
struct SrcMgrDiagInfo {
@@ -178,6 +207,10 @@
/// If the target supports dwarf debug info, this pointer is non-null.
DwarfDebug *DD = nullptr;
+ /// A handler that supports pseudo probe emission with embedded inline
+ /// context.
+ PseudoProbeHandler *PP = nullptr;
+
/// If the current module uses dwarf CFI annotations strictly for debugging.
bool isCFIMoveForDebugging = false;
@@ -193,6 +226,14 @@
uint16_t getDwarfVersion() const;
void setDwarfVersion(uint16_t Version);
+ bool isDwarf64() const;
+
+ /// Returns 4 for DWARF32 and 8 for DWARF64.
+ unsigned int getDwarfOffsetByteSize() const;
+
+ /// Returns 4 for DWARF32 and 12 for DWARF64.
+ unsigned int getUnitLengthFieldByteSize() const;
+
bool isPositionIndependent() const;
/// Return true if assembly output should contain comments.
@@ -207,7 +248,10 @@
MCSymbol *getFunctionBegin() const { return CurrentFnBegin; }
MCSymbol *getFunctionEnd() const { return CurrentFnEnd; }
- MCSymbol *getCurExceptionSym();
+
+ // Return the exception symbol associated with the MBB section containing a
+ // given basic block.
+ MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB);
/// Return information about object file lowering.
const TargetLoweringObjectFile &getObjFileLowering() const;
@@ -234,6 +278,11 @@
MCSymbol *getSymbol(const GlobalValue *GV) const;
+ /// Similar to getSymbol() but preferred for references. On ELF, this uses a
+ /// local symbol if a reference to GV is guaranteed to be resolved to the
+ /// definition in the same module.
+ MCSymbol *getSymbolPreferLocal(const GlobalValue &GV) const;
+
//===------------------------------------------------------------------===//
// XRay instrumentation implementation.
//===------------------------------------------------------------------===//
@@ -260,15 +309,12 @@
const class Function *Fn;
uint8_t Version;
- void emit(int, MCStreamer *, const MCSymbol *) const;
+ void emit(int, MCStreamer *) const;
};
// All the sleds to be emitted.
SmallVector<XRayFunctionEntry, 4> Sleds;
- // A unique ID used for ELF sections associated with a particular function.
- unsigned XRayFnUniqueID = 0;
-
// Helper function to record a given XRay sled.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind,
uint8_t Version = 0);
@@ -276,6 +322,8 @@
/// Emit a table with all XRay instrumentation points.
void emitXRayTable();
+ void emitPatchableFunctionEntries();
+
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
//===------------------------------------------------------------------===//
@@ -294,7 +342,7 @@
/// Emit the specified function out to the OutStreamer.
bool runOnMachineFunction(MachineFunction &MF) override {
SetupMachineFunction(MF);
- EmitFunctionBody();
+ emitFunctionBody();
return false;
}
@@ -304,10 +352,10 @@
/// This should be called when a new MachineFunction is being processed from
/// runOnMachineFunction.
- void SetupMachineFunction(MachineFunction &MF);
+ virtual void SetupMachineFunction(MachineFunction &MF);
/// This method emits the body and trailer for a function.
- void EmitFunctionBody();
+ void emitFunctionBody();
void emitCFIInstruction(const MachineInstr &MI);
@@ -315,7 +363,11 @@
void emitStackSizeSection(const MachineFunction &MF);
- void emitRemarksSection(Module &M);
+ void emitBBAddrMapSection(const MachineFunction &MF);
+
+ void emitPseudoProbe(const MachineInstr &MI);
+
+ void emitRemarksSection(remarks::RemarkStreamer &RS);
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
CFIMoveType needsCFIMoves() const;
@@ -329,31 +381,56 @@
/// Print to the current output stream assembly representations of the
/// constants in the constant pool MCP. This is used to print out constants
/// which have been "spilled to memory" by the code generator.
- virtual void EmitConstantPool();
+ virtual void emitConstantPool();
/// Print assembly representations of the jump tables used by the current
/// function to the current output stream.
- virtual void EmitJumpTableInfo();
+ virtual void emitJumpTableInfo();
/// Emit the specified global variable to the .s file.
- virtual void EmitGlobalVariable(const GlobalVariable *GV);
+ virtual void emitGlobalVariable(const GlobalVariable *GV);
/// Check to see if the specified global is a special global used by LLVM. If
/// so, emit it and return true, otherwise do nothing and return false.
- bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+ bool emitSpecialLLVMGlobal(const GlobalVariable *GV);
- /// Emit an alignment directive to the specified power of two boundary. For
- /// example, if you pass in 3 here, you will get an 8 byte alignment. If a
+ /// `llvm.global_ctors` and `llvm.global_dtors` are arrays of Structor
+ /// structs.
+ ///
+ /// Priority - init priority
+ /// Func - global initialization or global clean-up function
+ /// ComdatKey - associated data
+ struct Structor {
+ int Priority = 0;
+ Constant *Func = nullptr;
+ GlobalValue *ComdatKey = nullptr;
+
+ Structor() = default;
+ };
+
+ /// This method gathers an array of Structors and then sorts them out by
+ /// Priority.
+ /// @param List The initializer of `llvm.global_ctors` or `llvm.global_dtors`
+ /// array.
+ /// @param[out] Structors Sorted Structor structs by Priority.
+ void preprocessXXStructorList(const DataLayout &DL, const Constant *List,
+ SmallVector<Structor, 8> &Structors);
+
+ /// This method emits `llvm.global_ctors` or `llvm.global_dtors` list.
+ virtual void emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor);
+
+ /// Emit an alignment directive to the specified power of two boundary. If a
/// global value is specified, and if that global has an explicit alignment
/// requested, it will override the alignment request if required for
/// correctness.
- void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const;
+ void emitAlignment(Align Alignment, const GlobalObject *GV = nullptr) const;
/// Lower the specified LLVM Constant to an MCExpr.
virtual const MCExpr *lowerConstant(const Constant *CV);
/// Print a general LLVM constant to the .s file.
- void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
+ void emitGlobalConstant(const DataLayout &DL, const Constant *CV);
/// Unnamed constant global variables solely contaning a pointer to
/// another globals variable act like a global variable "proxy", or GOT
@@ -377,50 +454,59 @@
// Overridable Hooks
//===------------------------------------------------------------------===//
+ void addAsmPrinterHandler(HandlerInfo Handler) {
+ Handlers.insert(Handlers.begin(), std::move(Handler));
+ NumUserHandlers++;
+ }
+
// Targets can, or in the case of EmitInstruction, must implement these to
// customize output.
/// This virtual method can be overridden by targets that want to emit
/// something at the start of their file.
- virtual void EmitStartOfAsmFile(Module &) {}
+ virtual void emitStartOfAsmFile(Module &) {}
/// This virtual method can be overridden by targets that want to emit
/// something at the end of their file.
- virtual void EmitEndOfAsmFile(Module &) {}
+ virtual void emitEndOfAsmFile(Module &) {}
/// Targets can override this to emit stuff before the first basic block in
/// the function.
- virtual void EmitFunctionBodyStart() {}
+ virtual void emitFunctionBodyStart() {}
/// Targets can override this to emit stuff after the last basic block in the
/// function.
- virtual void EmitFunctionBodyEnd() {}
+ virtual void emitFunctionBodyEnd() {}
/// Targets can override this to emit stuff at the start of a basic block.
/// By default, this method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing it
/// if appropriate.
- virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
+ virtual void emitBasicBlockStart(const MachineBasicBlock &MBB);
/// Targets can override this to emit stuff at the end of a basic block.
- virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB);
+ virtual void emitBasicBlockEnd(const MachineBasicBlock &MBB);
/// Targets should implement this to emit instructions.
- virtual void EmitInstruction(const MachineInstr *) {
+ virtual void emitInstruction(const MachineInstr *) {
llvm_unreachable("EmitInstruction not implemented");
}
/// Return the symbol for the specified constant pool entry.
virtual MCSymbol *GetCPISymbol(unsigned CPID) const;
- virtual void EmitFunctionEntryLabel();
+ virtual void emitFunctionEntryLabel();
- virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+ virtual void emitFunctionDescriptor() {
+ llvm_unreachable("Function descriptor is target-specific.");
+ }
+
+ virtual void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
/// Targets can override this to change how global constants that are part of
/// a C++ static/global constructor list are emitted.
- virtual void EmitXXStructor(const DataLayout &DL, const Constant *CV) {
- EmitGlobalConstant(DL, CV);
+ virtual void emitXXStructor(const DataLayout &DL, const Constant *CV) {
+ emitGlobalConstant(DL, CV);
}
/// Return true if the basic block has exactly one predecessor and the control
@@ -433,6 +519,9 @@
/// instructions in verbose mode.
virtual void emitImplicitDef(const MachineInstr *MI) const;
+ /// Emit N NOP instructions.
+ void emitNops(unsigned N);
+
//===------------------------------------------------------------------===//
// Symbol Lowering Routines.
//===------------------------------------------------------------------===//
@@ -481,49 +570,47 @@
/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
/// .set if it is available.
- void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ void emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) const;
/// Emit something like ".uleb128 Hi-Lo".
- void EmitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+ void emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
const MCSymbol *Lo) const;
/// Emit something like ".long Label+Offset" where the size in bytes of the
/// directive is specified by Size and Label specifies the label. This
/// implicitly uses .set if it is available.
- void EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ void emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size, bool IsSectionRelative = false) const;
/// Emit something like ".long Label" where the size in bytes of the directive
/// is specified by Size and Label specifies the label.
- void EmitLabelReference(const MCSymbol *Label, unsigned Size,
+ void emitLabelReference(const MCSymbol *Label, unsigned Size,
bool IsSectionRelative = false) const {
- EmitLabelPlusOffset(Label, 0, Size, IsSectionRelative);
+ emitLabelPlusOffset(Label, 0, Size, IsSectionRelative);
}
- /// Emit something like ".long Label + Offset".
- void EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
-
//===------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===------------------------------------------------------------------===//
/// Emit the specified signed leb128 value.
- void EmitSLEB128(int64_t Value, const char *Desc = nullptr) const;
+ void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
/// Emit the specified unsigned leb128 value.
- void EmitULEB128(uint64_t Value, const char *Desc = nullptr, unsigned PadTo = 0) const;
+ void emitULEB128(uint64_t Value, const char *Desc = nullptr,
+ unsigned PadTo = 0) const;
/// Emit a .byte 42 directive that corresponds to an encoding. If verbose
/// assembly output is enabled, we output comments describing the encoding.
/// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
- void EmitEncodingByte(unsigned Val, const char *Desc = nullptr) const;
+ void emitEncodingByte(unsigned Val, const char *Desc = nullptr) const;
/// Return the size of the encoding in bytes.
unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
/// Emit reference to a ttype global with a specified encoding.
- void EmitTTypeReference(const GlobalValue *GV, unsigned Encoding) const;
+ virtual void emitTTypeReference(const GlobalValue *GV, unsigned Encoding);
/// Emit a reference to a symbol for use in dwarf. Different object formats
/// represent this in different ways. Some use a relocation others encode
@@ -531,18 +618,45 @@
void emitDwarfSymbolReference(const MCSymbol *Label,
bool ForceOffset = false) const;
- /// Emit the 4-byte offset of a string from the start of its section.
+ /// Emit the 4- or 8-byte offset of a string from the start of its section.
///
/// When possible, emit a DwarfStringPool section offset without any
/// relocations, and without using the symbol. Otherwise, defers to \a
/// emitDwarfSymbolReference().
+ ///
+ /// The length of the emitted value depends on the DWARF format.
void emitDwarfStringOffset(DwarfStringPoolEntry S) const;
- /// Emit the 4-byte offset of a string from the start of its section.
+ /// Emit the 4-or 8-byte offset of a string from the start of its section.
void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
emitDwarfStringOffset(S.getEntry());
}
+ /// Emit something like ".long Label + Offset" or ".quad Label + Offset"
+ /// depending on the DWARF format.
+ void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
+
+ /// Emit 32- or 64-bit value depending on the DWARF format.
+ void emitDwarfLengthOrOffset(uint64_t Value) const;
+
+ /// Emit a special value of 0xffffffff if producing 64-bit debugging info.
+ void maybeEmitDwarf64Mark() const;
+
+ /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+ /// according to the settings.
+ void emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const;
+
+ /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+ /// according to the settings.
+ void emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo,
+ const Twine &Comment) const;
+
+ /// Emit reference to a call site with a specified encoding
+ void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Encoding) const;
+ /// Emit an integer value corresponding to the call site encoding
+ void emitCallSiteValue(uint64_t Value, unsigned Encoding) const;
+
/// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
virtual unsigned getISAEncoding() { return 0; }
@@ -550,7 +664,7 @@
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
- virtual void EmitDebugValue(const MCExpr *Value, unsigned Size) const;
+ virtual void emitDebugValue(const MCExpr *Value, unsigned Size) const;
//===------------------------------------------------------------------===//
// Dwarf Lowering Routines
@@ -566,7 +680,7 @@
emitDwarfAbbrev(*Abbrev);
// Mark end of abbreviations.
- EmitULEB128(0, "EOM(3)");
+ emitULEB128(0, "EOM(3)");
}
void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const;
@@ -622,12 +736,16 @@
/// This emits visibility information about symbol, if this is supported by
/// the target.
- void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ void emitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition = true) const;
/// This emits linkage information about \p GVSym based on \p GV, if this is
/// supported by the target.
- void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+ virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+
+ /// Return the alignment for the specified \p GV.
+ static Align getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
+ Align InAlign = Align(1));
private:
/// Private state for PrintSpecial()
@@ -637,18 +755,21 @@
mutable unsigned Counter = ~0U;
/// This method emits the header for the current function.
- virtual void EmitFunctionHeader();
+ virtual void emitFunctionHeader();
+
+ /// This method emits a comment next to header for the current function.
+ virtual void emitFunctionHeaderComment();
/// Emit a blob of inline asm to the output streamer.
void
- EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+ emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
const MCTargetOptions &MCOptions,
const MDNode *LocMDNode = nullptr,
InlineAsm::AsmDialect AsmDialect = InlineAsm::AD_ATT) const;
/// This method formats and emits the specified machine instruction that is an
/// inline asm.
- void EmitInlineAsm(const MachineInstr *MI) const;
+ void emitInlineAsm(const MachineInstr *MI) const;
/// Add inline assembly info to the diagnostics machinery, so we can
/// emit file and position info. Returns SrcMgr memory buffer position.
@@ -659,21 +780,20 @@
// Internal Implementation Details
//===------------------------------------------------------------------===//
- void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid) const;
- void EmitLLVMUsedList(const ConstantArray *InitList);
+ void emitLLVMUsedList(const ConstantArray *InitList);
/// Emit llvm.ident metadata in an '.ident' directive.
- void EmitModuleIdents(Module &M);
+ void emitModuleIdents(Module &M);
/// Emit bytes for llvm.commandline metadata.
- void EmitModuleCommandLines(Module &M);
- void EmitXXStructorList(const DataLayout &DL, const Constant *List,
- bool isCtor);
+ void emitModuleCommandLines(Module &M);
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
/// Emit GlobalAlias or GlobalIFunc.
void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
- void setupCodePaddingContext(const MachineBasicBlock &MBB,
- MCCodePaddingContext &Context) const;
+
+ /// This method decides whether the specified basic block requires a label.
+ bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/AsmPrinterHandler.h b/linux-x64/clang/include/llvm/CodeGen/AsmPrinterHandler.h
index affb558..dc81a30 100644
--- a/linux-x64/clang/include/llvm/CodeGen/AsmPrinterHandler.h
+++ b/linux-x64/clang/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -23,8 +23,10 @@
class MachineFunction;
class MachineInstr;
class MCSymbol;
+class Module;
-typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
+typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB);
/// Collects and handles AsmPrinter objects required to build debug
/// or EH information.
@@ -36,6 +38,8 @@
/// this tracks that size.
virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
+ virtual void beginModule(Module *M) {}
+
/// Emit all sections that should come after the content.
virtual void endModule() = 0;
@@ -67,7 +71,14 @@
/// Process end of an instruction.
virtual void endInstruction() = 0;
+
+ /// Process beginning of a basic block during basic block sections.
+ virtual void beginBasicBlock(const MachineBasicBlock &MBB) {}
+
+ /// Process end of a basic block during basic block sections.
+ virtual void endBasicBlock(const MachineBasicBlock &MBB) {}
};
+
} // End of namespace llvm
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/BasicBlockSectionUtils.h b/linux-x64/clang/include/llvm/CodeGen/BasicBlockSectionUtils.h
new file mode 100644
index 0000000..d8da3be
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/BasicBlockSectionUtils.h
@@ -0,0 +1,30 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+extern cl::opt<std::string> BBSectionsColdTextPrefix;
+
+class MachineFunction;
+class MachineBasicBlock;
+
+using MachineBasicBlockComparator =
+ function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>;
+
+void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
+ MachineBasicBlockComparator MBBCmp);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/BasicTTIImpl.h b/linux-x64/clang/include/llvm/CodeGen/BasicTTIImpl.h
index 173be72..9776c20 100644
--- a/linux-x64/clang/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/linux-x64/clang/include/llvm/CodeGen/BasicTTIImpl.h
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -41,7 +40,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -79,27 +77,26 @@
using BaseT = TargetTransformInfoImplCRTPBase<T>;
using TTI = TargetTransformInfo;
+ /// Helper function to access this as a T.
+ T *thisT() { return static_cast<T *>(this); }
+
/// Estimate a cost of Broadcast as an extract and sequence of insert
/// operations.
- unsigned getBroadcastShuffleOverhead(Type *Ty) {
- assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) {
unsigned Cost = 0;
// Broadcast cost is equal to the cost of extracting the zero'th element
// plus the cost of inserting it into every element of the result vector.
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, Ty, 0);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::InsertElement, Ty, i);
+ for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
}
return Cost;
}
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
- unsigned getPermuteShuffleOverhead(Type *Ty) {
- assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) {
unsigned Cost = 0;
// Shuffle cost is equal to the cost of extracting element from its argument
// plus the cost of inserting them onto the result vector.
@@ -108,22 +105,23 @@
// index 0 of first vector, index 1 of second vector,index 2 of first
// vector and finally index 3 of second vector and insert them at index
// <0,1,2,3> of result vector.
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
}
return Cost;
}
/// Estimate a cost of subvector extraction as a sequence of extract and
/// insert operations.
- unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
- assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
+ FixedVectorType *SubVTy) {
+ assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
- int NumSubElts = SubTy->getVectorNumElements();
- assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+ int NumSubElts = SubVTy->getNumElements();
+ assert((!isa<FixedVectorType>(VTy) ||
+ (Index + NumSubElts) <=
+ (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
"SK_ExtractSubvector index out of range");
unsigned Cost = 0;
@@ -131,21 +129,24 @@
// the source type plus the cost of inserting them into the result vector
// type.
for (int i = 0; i != NumSubElts; ++i) {
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, Ty, i + Index);
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::InsertElement, SubTy, i);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+ i + Index);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
}
return Cost;
}
/// Estimate a cost of subvector insertion as a sequence of extract and
/// insert operations.
- unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
- assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
+ FixedVectorType *SubVTy) {
+ assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
- int NumSubElts = SubTy->getVectorNumElements();
- assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+ int NumSubElts = SubVTy->getNumElements();
+ assert((!isa<FixedVectorType>(VTy) ||
+ (Index + NumSubElts) <=
+ (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
"SK_InsertSubvector index out of range");
unsigned Cost = 0;
@@ -153,10 +154,10 @@
// the source type plus the cost of inserting them into the result vector
// type.
for (int i = 0; i != NumSubElts; ++i) {
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, SubTy, i);
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::InsertElement, Ty, i + Index);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
+ i + Index);
}
return Cost;
}
@@ -190,6 +191,7 @@
protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
+ virtual ~BasicTTIImplBase() = default;
using TargetTransformInfoImplBase::DL;
@@ -206,6 +208,8 @@
bool hasBranchDivergence() { return false; }
+ bool useGPUDivergenceAnalysis() { return false; }
+
bool isSourceOfDivergence(const Value *V) { return false; }
bool isAlwaysUniform(const Value *V) { return false; }
@@ -215,6 +219,24 @@
return -1;
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ return false;
+ }
+
+ bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
+ return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
+ }
+
+ unsigned getAssumedAddrSpace(const Value *V) const {
+ return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
+ }
+
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const {
+ return nullptr;
+ }
+
bool isLegalAddImmediate(int64_t imm) {
return getTLI()->isLegalAddImmediate(imm);
}
@@ -250,6 +272,14 @@
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
+ bool isNumRegsMajorCostOfLSR() {
+ return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
+ }
+
+ bool isProfitableLSRChainElement(Instruction *I) {
+ return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
+ }
+
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
TargetLoweringBase::AddrMode AM;
@@ -275,49 +305,21 @@
return getTLI()->isTypeLegal(VT);
}
+ unsigned getRegUsageForType(Type *Ty) {
+ return getTLI()->getTypeLegalizationCost(DL, Ty).first;
+ }
+
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) {
return BaseT::getGEPCost(PointeeType, Ptr, Operands);
}
- int getExtCost(const Instruction *I, const Value *Src) {
- if (getTLI()->isExtFree(I))
- return TargetTransformInfo::TCC_Free;
-
- if (isa<ZExtInst>(I) || isa<SExtInst>(I))
- if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
- if (getTLI()->isExtLoad(LI, I, DL))
- return TargetTransformInfo::TCC_Free;
-
- return TargetTransformInfo::TCC_Basic;
- }
-
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments, const User *U) {
- return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
- }
-
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys, const User *U) {
- if (IID == Intrinsic::cttz) {
- if (getTLI()->isCheapToSpeculateCttz())
- return TargetTransformInfo::TCC_Basic;
- return TargetTransformInfo::TCC_Expensive;
- }
-
- if (IID == Intrinsic::ctlz) {
- if (getTLI()->isCheapToSpeculateCtlz())
- return TargetTransformInfo::TCC_Basic;
- return TargetTransformInfo::TCC_Expensive;
- }
-
- return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
- }
-
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JumpTableSize) {
+ unsigned &JumpTableSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
/// Try to find the estimated number of clusters. Note that the number of
- /// clusters identified in this function could be different from the actural
+ /// clusters identified in this function could be different from the actual
/// numbers found in lowering. This function ignore switches that are
/// lowered with a mix of jump table / bit test / BTree. This function was
/// initially intended to be used when estimating the cost of switch in
@@ -363,7 +365,7 @@
(MaxCaseVal - MinCaseVal)
.getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
// Check whether a range of clusters is dense enough for a jump table
- if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
+ if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
JumpTableSize = Range;
return 1;
}
@@ -371,10 +373,6 @@
return N;
}
- unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
-
- unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
-
bool shouldBuildLookupTables() {
const TargetLoweringBase *TLI = getTLI();
return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
@@ -402,31 +400,10 @@
return TargetTransformInfo::TCC_Expensive;
}
- unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
- const TargetLoweringBase *TLI = getTLI();
- switch (Opcode) {
- default: break;
- case Instruction::Trunc:
- if (TLI->isTruncateFree(OpTy, Ty))
- return TargetTransformInfo::TCC_Free;
- return TargetTransformInfo::TCC_Basic;
- case Instruction::ZExt:
- if (TLI->isZExtFree(OpTy, Ty))
- return TargetTransformInfo::TCC_Free;
- return TargetTransformInfo::TCC_Basic;
-
- case Instruction::AddrSpaceCast:
- if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
- Ty->getPointerAddressSpace()))
- return TargetTransformInfo::TCC_Free;
- return TargetTransformInfo::TCC_Basic;
- }
-
- return BaseT::getOperationCost(Opcode, Ty, OpTy);
- }
-
unsigned getInliningThresholdMultiplier() { return 1; }
+ int getInlinerVectorBonusPercent() { return 150; }
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
@@ -462,20 +439,17 @@
return;
// Scan the loop: don't unroll loops with calls.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
- ++I) {
- BasicBlock *BB = *I;
-
- for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
- if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
- ImmutableCallSite CS(&*J);
- if (const Function *F = CS.getCalledFunction()) {
- if (!static_cast<T *>(this)->isLoweredToCall(F))
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : *BB) {
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+ if (!thisT()->isLoweredToCall(F))
continue;
}
return;
}
+ }
}
// Enable runtime and partial unrolling up to the specified size.
@@ -492,6 +466,14 @@
UP.BEInsns = 2;
}
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+ }
+
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
@@ -499,6 +481,41 @@
return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) {
+ return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+ }
+
+ bool emitGetActiveLaneMask() {
+ return BaseT::emitGetActiveLaneMask();
+ }
+
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) {
+ return BaseT::instCombineIntrinsic(IC, II);
+ }
+
+ Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II,
+ APInt DemandedMask,
+ KnownBits &Known,
+ bool &KnownBitsComputed) {
+ return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
+ KnownBitsComputed);
+ }
+
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) {
+ return BaseT::simplifyDemandedVectorEltsIntrinsic(
+ IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+ }
+
int getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
@@ -506,34 +523,92 @@
return BaseT::getInstructionLatency(I);
}
+ virtual Optional<unsigned>
+ getCacheSize(TargetTransformInfo::CacheLevel Level) const {
+ return Optional<unsigned>(
+ getST()->getCacheSize(static_cast<unsigned>(Level)));
+ }
+
+ virtual Optional<unsigned>
+ getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
+ Optional<unsigned> TargetResult =
+ getST()->getCacheAssociativity(static_cast<unsigned>(Level));
+
+ if (TargetResult)
+ return TargetResult;
+
+ return BaseT::getCacheAssociativity(Level);
+ }
+
+ virtual unsigned getCacheLineSize() const {
+ return getST()->getCacheLineSize();
+ }
+
+ virtual unsigned getPrefetchDistance() const {
+ return getST()->getPrefetchDistance();
+ }
+
+ virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const {
+ return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
+ NumPrefetches, HasCall);
+ }
+
+ virtual unsigned getMaxPrefetchIterationsAhead() const {
+ return getST()->getMaxPrefetchIterationsAhead();
+ }
+
+ virtual bool enableWritePrefetching() const {
+ return getST()->enableWritePrefetching();
+ }
+
/// @}
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
-
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
+ Optional<unsigned> getMaxVScale() const { return None; }
+
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- assert(Ty->isVectorTy() && "Can only scalarize vectors");
+ /// are set if the demanded result elements need to be inserted and/or
+ /// extracted from vectors.
+ unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,
+ bool Insert, bool Extract) {
+ /// FIXME: a bitfield is not a reasonable abstraction for talking about
+ /// which elements are needed from a scalable vector
+ auto *Ty = cast<FixedVectorType>(InTy);
+
+ assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
+ "Vector size mismatch");
+
unsigned Cost = 0;
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
if (Insert)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
if (Extract)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
}
return Cost;
}
- /// Estimate the overhead of scalarizing an instructions unique
+ /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
+ unsigned getScalarizationOverhead(VectorType *InTy, bool Insert,
+ bool Extract) {
+ auto *Ty = cast<FixedVectorType>(InTy);
+
+ APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
+ return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+ }
+
+ /// Estimate the overhead of scalarizing an instruction's unique
/// non-constant operands. The types of the arguments are ordinarily
/// scalar, in which case the costs are multiplied with VF.
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
@@ -541,16 +616,22 @@
unsigned Cost = 0;
SmallPtrSet<const Value*, 4> UniqueOperands;
for (const Value *A : Args) {
+ // Disregard things like metadata arguments.
+ Type *Ty = A->getType();
+ if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
+ !Ty->isPtrOrPtrVectorTy())
+ continue;
+
if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
- Type *VecTy = nullptr;
- if (A->getType()->isVectorTy()) {
- VecTy = A->getType();
+ auto *VecTy = dyn_cast<VectorType>(Ty);
+ if (VecTy) {
// If A is a vector operand, VF should be 1 or correspond to A.
- assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
+ assert((VF == 1 ||
+ VF == cast<FixedVectorType>(VecTy)->getNumElements()) &&
"Vector argument does not match VF");
}
else
- VecTy = VectorType::get(A->getType(), VF);
+ VecTy = FixedVectorType::get(Ty, VF);
Cost += getScalarizationOverhead(VecTy, false, true);
}
@@ -559,19 +640,19 @@
return Cost;
}
- unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
- assert(VecTy->isVectorTy());
+ unsigned getScalarizationOverhead(VectorType *InTy,
+ ArrayRef<const Value *> Args) {
+ auto *Ty = cast<FixedVectorType>(InTy);
unsigned Cost = 0;
- Cost += getScalarizationOverhead(VecTy, true, false);
+ Cost += getScalarizationOverhead(Ty, true, false);
if (!Args.empty())
- Cost += getOperandsScalarizationOverhead(Args,
- VecTy->getVectorNumElements());
+ Cost += getOperandsScalarizationOverhead(Args, Ty->getNumElements());
else
// When no information on arguments is provided, we add the cost
// associated with one argument as a heuristic.
- Cost += getScalarizationOverhead(VecTy, false, true);
+ Cost += getScalarizationOverhead(Ty, false, true);
return Cost;
}
@@ -580,16 +661,25 @@
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
+ Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo,
+ Args, CxtI);
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
bool IsFloat = Ty->isFPOrFPVectorTy();
@@ -612,91 +702,115 @@
// Else, assume that we need to scalarize this op.
// TODO: If one of the types get legalized by splitting, handle this
// similarly to what getCastInstrCost() does.
- if (Ty->isVectorTy()) {
- unsigned Num = Ty->getVectorNumElements();
- unsigned Cost = static_cast<T *>(this)
- ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
+ unsigned Cost = thisT()->getArithmeticInstrCost(
+ Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return getScalarizationOverhead(Ty, Args) + Num * Cost;
+ return getScalarizationOverhead(VTy, Args) + Num * Cost;
}
// We don't know anything about this scalar instruction.
return OpCost;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) {
+
switch (Kind) {
case TTI::SK_Broadcast:
- return getBroadcastShuffleOverhead(Tp);
+ return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
case TTI::SK_Select:
case TTI::SK_Reverse:
case TTI::SK_Transpose:
case TTI::SK_PermuteSingleSrc:
case TTI::SK_PermuteTwoSrc:
- return getPermuteShuffleOverhead(Tp);
+ return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
case TTI::SK_ExtractSubvector:
- return getExtractSubvectorOverhead(Tp, Index, SubTp);
+ return getExtractSubvectorOverhead(Tp, Index,
+ cast<FixedVectorType>(SubTp));
case TTI::SK_InsertSubvector:
- return getInsertSubvectorOverhead(Tp, Index, SubTp);
+ return getInsertSubvectorOverhead(Tp, Index,
+ cast<FixedVectorType>(SubTp));
}
llvm_unreachable("Unknown TTI::ShuffleKind");
}
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
+ if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
+ return 0;
+
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
- // Check for NOOP conversions.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+ TypeSize SrcSize = SrcLT.second.getSizeInBits();
+ TypeSize DstSize = DstLT.second.getSizeInBits();
+ bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
+ bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
- // Bitcast between types that are legalized to the same type are free.
- if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Trunc:
+ // Check for NOOP conversions.
+ if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
return 0;
- }
+ LLVM_FALLTHROUGH;
+ case Instruction::BitCast:
+ // Bitcast between types that are legalized to the same type are free and
+ // assume int to/from ptr of the same size is also free.
+ if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
+ SrcSize == DstSize)
+ return 0;
+ break;
+ case Instruction::FPExt:
+ if (I && getTLI()->isExtFree(I))
+ return 0;
+ break;
+ case Instruction::ZExt:
+ if (TLI->isZExtFree(SrcLT.second, DstLT.second))
+ return 0;
+ LLVM_FALLTHROUGH;
+ case Instruction::SExt:
+ if (I && getTLI()->isExtFree(I))
+ return 0;
- if (Opcode == Instruction::Trunc &&
- TLI->isTruncateFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::ZExt &&
- TLI->isZExtFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::AddrSpaceCast &&
- TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
- Dst->getPointerAddressSpace()))
- return 0;
-
- // If this is a zext/sext of a load, return 0 if the corresponding
- // extending load exists on target.
- if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- I && isa<LoadInst>(I->getOperand(0))) {
+ // If this is a zext/sext of a load, return 0 if the corresponding
+ // extending load exists on target.
+ if (CCH == TTI::CastContextHint::Normal) {
EVT ExtVT = EVT::getEVT(Dst);
EVT LoadVT = EVT::getEVT(Src);
unsigned LType =
((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
return 0;
+ }
+ break;
+ case Instruction::AddrSpaceCast:
+ if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
+ Dst->getPointerAddressSpace()))
+ return 0;
+ break;
}
+ auto *SrcVTy = dyn_cast<VectorType>(Src);
+ auto *DstVTy = dyn_cast<VectorType>(Dst);
+
// If the cast is marked as legal (or promote) then assume low cost.
if (SrcLT.first == DstLT.first &&
TLI->isOperationLegalOrPromote(ISD, DstLT.second))
- return 1;
+ return SrcLT.first;
// Handle scalar conversions.
- if (!Src->isVectorTy() && !Dst->isVectorTy()) {
- // Scalar bitcasts are usually free.
- if (Opcode == Instruction::BitCast)
- return 0;
-
+ if (!SrcVTy && !DstVTy) {
// Just check the op cost. If the operation is legal then assume it costs
// 1.
if (!TLI->isOperationExpand(ISD, DstLT.second))
@@ -707,18 +821,17 @@
}
// Check vector-to-vector casts.
- if (Dst->isVectorTy() && Src->isVectorTy()) {
+ if (DstVTy && SrcVTy) {
// If the cast is between same-sized registers, then the check is simple.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+ if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
// Assume that Zext is done using AND.
if (Opcode == Instruction::ZExt)
- return 1;
+ return SrcLT.first;
// Assume that sext is done using SHL and SRA.
if (Opcode == Instruction::SExt)
- return 2;
+ return SrcLT.first * 2;
// Just check the op cost. If the operation is legal then assume it
// costs
@@ -731,63 +844,75 @@
// of casting the original vector twice. We also need to factor in the
// cost of the split itself. Count that as 1, to be consistent with
// TLI->getTypeLegalizationCost().
- if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
- TargetLowering::TypeSplitVector) ||
- (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
- TargetLowering::TypeSplitVector)) {
- Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
- Dst->getVectorNumElements() / 2);
- Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
- Src->getVectorNumElements() / 2);
+ bool SplitSrc =
+ TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
+ TargetLowering::TypeSplitVector;
+ bool SplitDst =
+ TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
+ TargetLowering::TypeSplitVector;
+ if ((SplitSrc || SplitDst) &&
+ cast<FixedVectorType>(SrcVTy)->getNumElements() > 1 &&
+ cast<FixedVectorType>(DstVTy)->getNumElements() > 1) {
+ Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
+ Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
T *TTI = static_cast<T *>(this);
- return TTI->getVectorSplitCost() +
- (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
+ // If both types need to be split then the split is free.
+ unsigned SplitCost =
+ (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
+ return SplitCost +
+ (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
+ CostKind, I));
}
// In other cases where the source or destination are illegal, assume
// the operation will get scalarized.
- unsigned Num = Dst->getVectorNumElements();
- unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
- Opcode, Dst->getScalarType(), Src->getScalarType(), I);
+ unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
+ unsigned Cost = thisT()->getCastInstrCost(
+ Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+ return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
}
// We already handled vector-to-vector and scalar-to-scalar conversions.
// This
// is where we handle bitcast between vectors and scalars. We need to assume
// that the conversion is scalarized in one way or another.
- if (Opcode == Instruction::BitCast)
+ if (Opcode == Instruction::BitCast) {
// Illegal bitcasts are done by storing and loading from a stack slot.
- return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
- : 0) +
- (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
- : 0);
+ return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
+ (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
+ }
llvm_unreachable("Unhandled cast");
}
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) {
- return static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, VecTy, Index) +
- static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
- VecTy->getElementType());
+ return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+ Index) +
+ thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
+ TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
}
- unsigned getCFInstrCost(unsigned Opcode) {
- // Branches are assumed to be predicted.
- return 0;
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ return BaseT::getCFInstrCost(Opcode, CostKind);
}
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- const Instruction *I) {
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
+
// Selects on vectors are actually vector selects.
if (ISD == ISD::SELECT) {
assert(CondTy && "CondTy must exist");
@@ -806,16 +931,16 @@
// Otherwise, assume that the cast is scalarized.
// TODO: If one of the types get legalized by splitting, handle this
// similarly to what getCastInstrCost() does.
- if (ValTy->isVectorTy()) {
- unsigned Num = ValTy->getVectorNumElements();
+ if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
+ unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
if (CondTy)
CondTy = CondTy->getScalarType();
- unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
- Opcode, ValTy->getScalarType(), CondTy, I);
+ unsigned Cost = thisT()->getCmpSelInstrCost(
+ Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+ return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
}
// Unknown scalar opcode.
@@ -829,16 +954,27 @@
return LT.first;
}
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr) {
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type");
+ // Assume types, such as structs, are expensive.
+ if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
+ return 4;
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
// Assuming that all loads of legal types cost 1.
unsigned Cost = LT.first;
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
if (Src->isVectorTy() &&
- Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
+ // In practice it's not currently possible to have a change in lane
+ // length for extending loads or truncating stores so both types should
+ // have the same scalable property.
+ TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
+ LT.second.getSizeInBits())) {
// This is a vector load that legalizes to a larger type than the vector
// itself. Unless the corresponding extending load or truncating store is
// legal, then this will scalarize.
@@ -852,7 +988,8 @@
if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
// This is a vector load/store for some illegal type that is scalarized.
// We must account for the cost of building or decomposing the vector.
- Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
+ Cost += getScalarizationOverhead(cast<VectorType>(Src),
+ Opcode != Instruction::Store,
Opcode == Instruction::Store);
}
}
@@ -860,35 +997,76 @@
return Cost;
}
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false) {
- VectorType *VT = dyn_cast<VectorType>(VecTy);
- assert(VT && "Expect a vector type for interleaved memory op");
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
+ auto *VT = cast<FixedVectorType>(DataTy);
+ // Assume the target does not have support for gather/scatter operations
+ // and provide a rough estimate.
+ //
+ // First, compute the cost of extracting the individual addresses and the
+ // individual memory operations.
+ int LoadCost =
+ VT->getNumElements() *
+ (getVectorInstrCost(
+ Instruction::ExtractElement,
+ FixedVectorType::get(PointerType::get(VT->getElementType(), 0),
+ VT->getNumElements()),
+ -1) +
+ getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
+
+ // Next, compute the cost of packing the result in a vector.
+ int PackingCost = getScalarizationOverhead(VT, Opcode != Instruction::Store,
+ Opcode == Instruction::Store);
+
+ int ConditionalCost = 0;
+ if (VariableMask) {
+ // Compute the cost of conditionally executing the memory operations with
+ // variable masks. This includes extracting the individual conditions, a
+ // branches and PHIs to combine the results.
+ // NOTE: Estimating the cost of conditionally executing the memory
+ // operations accurately is quite difficult and the current solution
+ // provides a very rough estimate only.
+ ConditionalCost =
+ VT->getNumElements() *
+ (getVectorInstrCost(
+ Instruction::ExtractElement,
+ FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
+ VT->getNumElements()),
+ -1) +
+ getCFInstrCost(Instruction::Br, CostKind) +
+ getCFInstrCost(Instruction::PHI, CostKind));
+ }
+
+ return LoadCost + PackingCost + ConditionalCost;
+ }
+
+ unsigned getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) {
+ auto *VT = cast<FixedVectorType>(VecTy);
unsigned NumElts = VT->getNumElements();
assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
unsigned NumSubElts = NumElts / Factor;
- VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
+ auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
// Firstly, the cost of load/store operation.
unsigned Cost;
if (UseMaskForCond || UseMaskForGaps)
- Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
- Opcode, VecTy, Alignment, AddressSpace);
+ Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
+ AddressSpace, CostKind);
else
- Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
- AddressSpace);
+ Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
+ CostKind);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.
MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
- unsigned VecTySize =
- static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
+ unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
unsigned VecTyLTSize = VecTyLT.getStoreSize();
// Return the ceiling of dividing A by B.
@@ -947,14 +1125,14 @@
// Extract elements from loaded vector for each sub vector.
for (unsigned i = 0; i < NumSubElts; i++)
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, VT, Index + i * Factor);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
+ Index + i * Factor);
}
unsigned InsSubCost = 0;
for (unsigned i = 0; i < NumSubElts; i++)
- InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::InsertElement, SubVT, i);
+ InsSubCost +=
+ thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
Cost += Indices.size() * InsSubCost;
} else {
@@ -969,8 +1147,8 @@
unsigned ExtSubCost = 0;
for (unsigned i = 0; i < NumSubElts; i++)
- ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, SubVT, i);
+ ExtSubCost +=
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
Cost += ExtSubCost * Factor;
for (unsigned i = 0; i < NumElts; i++)
@@ -982,8 +1160,8 @@
return Cost;
Type *I8Type = Type::getInt8Ty(VT->getContext());
- VectorType *MaskVT = VectorType::get(I8Type, NumElts);
- SubVT = VectorType::get(I8Type, NumSubElts);
+ auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
+ SubVT = FixedVectorType::get(I8Type, NumSubElts);
// The Mask shuffling cost is extract all the elements of the Mask
// and insert each of them Factor times into the wide vector:
@@ -996,12 +1174,12 @@
// vector and insert them factor times into the <24xi1> shuffled mask
// vector.
for (unsigned i = 0; i < NumSubElts; i++)
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::ExtractElement, SubVT, i);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
for (unsigned i = 0; i < NumElts; i++)
- Cost += static_cast<T *>(this)->getVectorInstrCost(
- Instruction::InsertElement, MaskVT, i);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
// The Gaps mask is invariant and created outside the loop, therefore the
// cost of creating it is not accounted for here. However if we have both
@@ -1009,82 +1187,127 @@
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
if (UseMaskForGaps)
- Cost += static_cast<T *>(this)->getArithmeticInstrCost(
- BinaryOperator::And, MaskVT);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
+ CostKind);
return Cost;
}
/// Get intrinsic cost based on arguments.
- unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1) {
- unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
- assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
- auto *ConcreteTTI = static_cast<T *>(this);
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ // Check for generically free intrinsics.
+ if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
+ return 0;
+ // Assume that target intrinsics are cheap.
+ Intrinsic::ID IID = ICA.getID();
+ if (Function::isTargetIntrinsic(IID))
+ return TargetTransformInfo::TCC_Basic;
+
+ if (ICA.isTypeBasedOnly())
+ return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+
+ Type *RetTy = ICA.getReturnType();
+
+ ElementCount VF = ICA.getVectorFactor();
+ ElementCount RetVF =
+ (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
+ : ElementCount::getFixed(1));
+ assert((RetVF.isScalar() || VF.isScalar()) &&
+ "VF > 1 and RetVF is a vector type");
+ const IntrinsicInst *I = ICA.getInst();
+ const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
+ FastMathFlags FMF = ICA.getFlags();
switch (IID) {
- default: {
- // Assume that we need to scalarize this intrinsic.
- SmallVector<Type *, 4> Types;
- for (Value *Op : Args) {
- Type *OpTy = Op->getType();
- assert(VF == 1 || !OpTy->isVectorTy());
- Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
- }
+ default:
+ break;
- if (VF > 1 && !RetTy->isVoidTy())
- RetTy = VectorType::get(RetTy, VF);
+ case Intrinsic::cttz:
+ // FIXME: If necessary, this should go in target-specific overrides.
+ if (VF.isScalar() && RetVF.isScalar() &&
+ getTLI()->isCheapToSpeculateCttz())
+ return TargetTransformInfo::TCC_Basic;
+ break;
- // Compute the scalarization overhead based on Args for a vector
- // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
- // CostModel will pass a vector RetTy and VF is 1.
- unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
- if (RetVF > 1 || VF > 1) {
- ScalarizationCost = 0;
- if (!RetTy->isVoidTy())
- ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
- ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
- }
+ case Intrinsic::ctlz:
+ // FIXME: If necessary, this should go in target-specific overrides.
+ if (VF.isScalar() && RetVF.isScalar() &&
+ getTLI()->isCheapToSpeculateCtlz())
+ return TargetTransformInfo::TCC_Basic;
+ break;
- return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
- ScalarizationCost);
- }
+ case Intrinsic::memcpy:
+ return thisT()->getMemcpyCost(ICA.getInst());
+
case Intrinsic::masked_scatter: {
- assert(VF == 1 && "Can't vectorize types here.");
- Value *Mask = Args[3];
+ assert(VF.isScalar() && "Can't vectorize types here.");
+ const Value *Mask = Args[3];
bool VarMask = !isa<Constant>(Mask);
- unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
- return ConcreteTTI->getGatherScatterOpCost(
- Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
+ Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
+ return thisT()->getGatherScatterOpCost(Instruction::Store,
+ Args[0]->getType(), Args[1],
+ VarMask, Alignment, CostKind, I);
}
case Intrinsic::masked_gather: {
- assert(VF == 1 && "Can't vectorize types here.");
- Value *Mask = Args[2];
+ assert(VF.isScalar() && "Can't vectorize types here.");
+ const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
- unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
- return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
- Args[0], VarMask, Alignment);
+ Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
+ return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
+ VarMask, Alignment, CostKind, I);
}
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
+ case Intrinsic::experimental_vector_extract: {
+ // FIXME: Handle case where a scalable vector is extracted from a scalable
+ // vector
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
+ return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
+ cast<VectorType>(Args[0]->getType()),
+ Index, cast<VectorType>(RetTy));
+ }
+ case Intrinsic::experimental_vector_insert: {
+ // FIXME: Handle case where a scalable vector is inserted into a scalable
+ // vector
+ if (isa<ScalableVectorType>(Args[1]->getType()))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
+ return thisT()->getShuffleCost(
+ TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index,
+ cast<VectorType>(Args[1]->getType()));
+ }
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin: {
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
+ return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ IntrinsicCostAttributes Attrs(
+ IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
+ return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
+ }
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *X = Args[0];
- Value *Y = Args[1];
- Value *Z = Args[2];
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ const Value *X = Args[0];
+ const Value *Y = Args[1];
+ const Value *Z = Args[2];
TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
@@ -1095,64 +1318,116 @@
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
unsigned Cost = 0;
- Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
- OpKindX, OpKindZ, OpPropsX);
- Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
- OpKindY, OpKindZ, OpPropsY);
+ Cost +=
+ thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
+ Cost +=
+ thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
// Non-constant shift amounts requires a modulo.
if (OpKindZ != TTI::OK_UniformConstantValue &&
OpKindZ != TTI::OK_NonUniformConstantValue)
- Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
- OpKindZ, OpKindBW, OpPropsZ,
- OpPropsBW);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
+ CostKind, OpKindZ, OpKindBW,
+ OpPropsZ, OpPropsBW);
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
- CondTy, nullptr);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
- CondTy, nullptr);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
return Cost;
}
}
+ // TODO: Handle the remaining intrinsic with scalable vector type
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ // Assume that we need to scalarize this intrinsic.
+ SmallVector<Type *, 4> Types;
+ for (const Value *Op : Args) {
+ Type *OpTy = Op->getType();
+ assert(VF.isScalar() || !OpTy->isVectorTy());
+ Types.push_back(VF.isScalar()
+ ? OpTy
+ : FixedVectorType::get(OpTy, VF.getKnownMinValue()));
+ }
+
+ if (VF.isVector() && !RetTy->isVoidTy())
+ RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue());
+
+ // Compute the scalarization overhead based on Args for a vector
+ // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
+ // CostModel will pass a vector RetTy and VF is 1.
+ unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+ if (RetVF.isVector() || VF.isVector()) {
+ ScalarizationCost = 0;
+ if (!RetTy->isVoidTy())
+ ScalarizationCost +=
+ getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
+ ScalarizationCost +=
+ getOperandsScalarizationOverhead(Args, VF.getKnownMinValue());
+ }
+
+ IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
+ return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
}
/// Get intrinsic cost based on argument types.
/// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
/// cost of scalarizing the arguments and the return value will be computed
/// based on types.
- unsigned getIntrinsicInstrCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
- unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
- auto *ConcreteTTI = static_cast<T *>(this);
+ unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ Intrinsic::ID IID = ICA.getID();
+ Type *RetTy = ICA.getReturnType();
+ const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
+ FastMathFlags FMF = ICA.getFlags();
+ unsigned ScalarizationCostPassed = ICA.getScalarizationCost();
+ bool SkipScalarizationCost = ICA.skipScalarizationCost();
+ VectorType *VecOpTy = nullptr;
+ if (!Tys.empty()) {
+ // The vector reduction operand is operand 0 except for fadd/fmul.
+ // Their operand 0 is a scalar start value, so the vector op is operand 1.
+ unsigned VecTyIndex = 0;
+ if (IID == Intrinsic::vector_reduce_fadd ||
+ IID == Intrinsic::vector_reduce_fmul)
+ VecTyIndex = 1;
+ assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
+ VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
+ }
+
+ // Library call cost - other than size, make it expensive.
+ unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
SmallVector<unsigned, 2> ISDs;
- unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
default: {
// Assume that we need to scalarize this intrinsic.
unsigned ScalarizationCost = ScalarizationCostPassed;
unsigned ScalarCalls = 1;
Type *ScalarRetTy = RetTy;
- if (RetTy->isVectorTy()) {
- if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
+ if (!SkipScalarizationCost)
+ ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
+ ScalarCalls = std::max(ScalarCalls,
+ cast<FixedVectorType>(RetVTy)->getNumElements());
ScalarRetTy = RetTy->getScalarType();
}
SmallVector<Type *, 4> ScalarTys;
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
Type *Ty = Tys[i];
- if (Ty->isVectorTy()) {
- if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
- ScalarizationCost += getScalarizationOverhead(Ty, false, true);
- ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ if (!SkipScalarizationCost)
+ ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+ ScalarCalls = std::max(ScalarCalls,
+ cast<FixedVectorType>(VTy)->getNumElements());
Ty = Ty->getScalarType();
}
ScalarTys.push_back(Ty);
@@ -1160,8 +1435,9 @@
if (ScalarCalls == 1)
return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
+ IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
unsigned ScalarCost =
- ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
+ thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
return ScalarCalls * ScalarCost + ScalarizationCost;
}
@@ -1199,13 +1475,15 @@
break;
case Intrinsic::minnum:
ISDs.push_back(ISD::FMINNUM);
- if (FMF.noNaNs())
- ISDs.push_back(ISD::FMINIMUM);
break;
case Intrinsic::maxnum:
ISDs.push_back(ISD::FMAXNUM);
- if (FMF.noNaNs())
- ISDs.push_back(ISD::FMAXIMUM);
+ break;
+ case Intrinsic::minimum:
+ ISDs.push_back(ISD::FMINIMUM);
+ break;
+ case Intrinsic::maximum:
+ ISDs.push_back(ISD::FMAXIMUM);
break;
case Intrinsic::copysign:
ISDs.push_back(ISD::FCOPYSIGN);
@@ -1228,6 +1506,9 @@
case Intrinsic::round:
ISDs.push_back(ISD::FROUND);
break;
+ case Intrinsic::roundeven:
+ ISDs.push_back(ISD::FROUNDEVEN);
+ break;
case Intrinsic::pow:
ISDs.push_back(ISD::FPOW);
break;
@@ -1237,58 +1518,96 @@
case Intrinsic::fmuladd:
ISDs.push_back(ISD::FMA);
break;
+ case Intrinsic::experimental_constrained_fmuladd:
+ ISDs.push_back(ISD::STRICT_FMA);
+ break;
// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
return 0;
- case Intrinsic::masked_store:
- return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
- 0);
- case Intrinsic::masked_load:
- return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
- case Intrinsic::experimental_vector_reduce_add:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
- /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_mul:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
- /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_and:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
- /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_or:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
- /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_xor:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
- /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- return ConcreteTTI->getArithmeticReductionCost(
- Instruction::FAdd, Tys[0],
- /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
- // reductions.
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- return ConcreteTTI->getArithmeticReductionCost(
- Instruction::FMul, Tys[0],
- /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
- // reductions.
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- return ConcreteTTI->getMinMaxReductionCost(
- Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/true);
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- return ConcreteTTI->getMinMaxReductionCost(
- Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/false);
+ case Intrinsic::masked_store: {
+ Type *Ty = Tys[0];
+ Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
+ return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
+ CostKind);
+ }
+ case Intrinsic::masked_load: {
+ Type *Ty = RetTy;
+ Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
+ return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
+ CostKind);
+ }
+ case Intrinsic::vector_reduce_add:
+ return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_mul:
+ return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_and:
+ return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_or:
+ return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_xor:
+ return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_fadd:
+ // FIXME: Add new flag for cost of strict reductions.
+ return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_fmul:
+ // FIXME: Add new flag for cost of strict reductions.
+ return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
+ /*IsPairwiseForm=*/false,
+ CostKind);
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ return thisT()->getMinMaxReductionCost(
+ VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
+ /*IsPairwiseForm=*/false,
+ /*IsUnsigned=*/false, CostKind);
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ return thisT()->getMinMaxReductionCost(
+ VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
+ /*IsPairwiseForm=*/false,
+ /*IsUnsigned=*/true, CostKind);
+ case Intrinsic::abs:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ // minmax(X,Y) = select(icmp(X,Y),X,Y)
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ unsigned Cost = 0;
+ // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ if (IID == Intrinsic::abs)
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ return Cost;
+ }
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
Type *OpTy = StructType::create({RetTy, CondTy});
Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
@@ -1298,19 +1617,20 @@
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
unsigned Cost = 0;
- Cost += ConcreteTTI->getIntrinsicInstrCost(
- OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
- CondTy, nullptr);
- Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
- CondTy, nullptr);
+ IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
+ ScalarizationCostPassed);
+ Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(
+ BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat: {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
Type *OpTy = StructType::create({RetTy, CondTy});
Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
@@ -1318,34 +1638,36 @@
: Intrinsic::usub_with_overflow;
unsigned Cost = 0;
- Cost += ConcreteTTI->getIntrinsicInstrCost(
- OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
- CondTy, nullptr);
+ IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
+ ScalarizationCostPassed);
+ Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::smul_fix:
case Intrinsic::umul_fix: {
unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
- Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
- if (RetVF > 1)
- ExtTy = VectorType::get(ExtTy, RetVF);
+ Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
unsigned Cost = 0;
- Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
Cost +=
- 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
- TTI::OK_AnyValue,
- TTI::OK_UniformConstantValue);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
- TTI::OK_AnyValue,
- TTI::OK_UniformConstantValue);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
+ thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+ Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+ CCH, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
+ CostKind, TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
return Cost;
}
case Intrinsic::sadd_with_overflow:
@@ -1365,13 +1687,15 @@
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
unsigned Cost = 0;
- Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
- Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
- OverflowTy, nullptr);
- Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
- BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
- Cost +=
- ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
+ Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
+ Cost += 3 * thisT()->getCmpSelInstrCost(
+ Instruction::ICmp, SumTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(
+ Instruction::Select, OverflowTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
+ CostKind);
return Cost;
}
case Intrinsic::uadd_with_overflow:
@@ -1383,9 +1707,10 @@
: BinaryOperator::Sub;
unsigned Cost = 0;
- Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
- OverflowTy, nullptr);
+ Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::smul_with_overflow:
@@ -1393,29 +1718,30 @@
Type *MulTy = RetTy->getContainedType(0);
Type *OverflowTy = RetTy->getContainedType(1);
unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
- Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
- if (MulTy->isVectorTy())
- ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
+ Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
unsigned Cost = 0;
- Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
Cost +=
- 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
- Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
- TTI::OK_AnyValue,
- TTI::OK_UniformConstantValue);
+ thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+ Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+ CCH, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
+ CostKind, TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
if (IID == Intrinsic::smul_with_overflow)
- Cost += ConcreteTTI->getArithmeticInstrCost(
- Instruction::AShr, MulTy, TTI::OK_AnyValue,
- TTI::OK_UniformConstantValue);
+ Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
+ CostKind, TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
- Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
- OverflowTy, nullptr);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::ctpop:
@@ -1425,6 +1751,12 @@
SingleCallCost = TargetTransformInfo::TCC_Expensive;
break;
// FIXME: ctlz, cttz, ...
+ case Intrinsic::bswap:
+ ISDs.push_back(ISD::BSWAP);
+ break;
+ case Intrinsic::bitreverse:
+ ISDs.push_back(ISD::BITREVERSE);
+ break;
}
const TargetLoweringBase *TLI = getTLI();
@@ -1454,7 +1786,7 @@
}
}
- auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
+ auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
if (MinLegalCostI != LegalCost.end())
return *MinLegalCostI;
@@ -1466,18 +1798,27 @@
// If we can't lower fmuladd into an FMA estimate the cost as a floating
// point mul followed by an add.
if (IID == Intrinsic::fmuladd)
- return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
- ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+ return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
+ CostKind) +
+ thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
+ CostKind);
+ if (IID == Intrinsic::experimental_constrained_fmuladd) {
+ IntrinsicCostAttributes FMulAttrs(
+ Intrinsic::experimental_constrained_fmul, RetTy, Tys);
+ IntrinsicCostAttributes FAddAttrs(
+ Intrinsic::experimental_constrained_fadd, RetTy, Tys);
+ return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
+ thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
+ }
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
- if (RetTy->isVectorTy()) {
- unsigned ScalarizationCost =
- ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
- ? ScalarizationCostPassed
- : getScalarizationOverhead(RetTy, true, false));
- unsigned ScalarCalls = RetTy->getVectorNumElements();
+ if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
+ unsigned ScalarizationCost = SkipScalarizationCost ?
+ ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false);
+
+ unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
SmallVector<Type *, 4> ScalarTys;
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
Type *Ty = Tys[i];
@@ -1485,16 +1826,16 @@
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
- unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
- IID, RetTy->getScalarType(), ScalarTys, FMF);
+ IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
+ unsigned ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind);
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
- ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
+ if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
+ if (!ICA.skipScalarizationCost())
+ ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+ ScalarCalls = std::max(ScalarCalls,
+ cast<FixedVectorType>(VTy)->getNumElements());
}
}
-
return ScalarCalls * ScalarCost + ScalarizationCost;
}
@@ -1513,7 +1854,8 @@
/// \param RetTy Return value types.
/// \param Tys Argument types.
/// \returns The cost of Call instruction.
- unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
+ unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
return 10;
}
@@ -1562,28 +1904,27 @@
///
/// The cost model should take into account that the actual length of the
/// vector is reduced on each iteration.
- unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) {
- assert(Ty->isVectorTy() && "Expect a vector type");
- Type *ScalarTy = Ty->getVectorElementType();
- unsigned NumVecElts = Ty->getVectorNumElements();
+ unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwise,
+ TTI::TargetCostKind CostKind) {
+ Type *ScalarTy = Ty->getElementType();
+ unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned ArithCost = 0;
unsigned ShuffleCost = 0;
- auto *ConcreteTTI = static_cast<T *>(this);
std::pair<unsigned, MVT> LT =
- ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+ thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
unsigned LongVectorCount = 0;
unsigned MVTLen =
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
- Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
+ VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
- ShuffleCost += (IsPairwise + 1) *
- ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, SubTy);
- ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
+ ShuffleCost +=
+ (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
+ Ty, NumVecElts, SubTy);
+ ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
Ty = SubTy;
++LongVectorCount;
}
@@ -1602,22 +1943,20 @@
if (IsPairwise && NumReduxLevels >= 1)
NumShuffles += NumReduxLevels - 1;
ShuffleCost += NumShuffles *
- ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
- 0, Ty);
- ArithCost += NumReduxLevels *
- ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
+ thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
+ ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
return ShuffleCost + ArithCost +
- ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
/// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction.
- unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
- bool) {
- assert(Ty->isVectorTy() && "Expect a vector type");
- Type *ScalarTy = Ty->getVectorElementType();
- Type *ScalarCondTy = CondTy->getVectorElementType();
- unsigned NumVecElts = Ty->getVectorNumElements();
+ unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
+ Type *ScalarTy = Ty->getElementType();
+ Type *ScalarCondTy = CondTy->getElementType();
+ unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned CmpOpcode;
if (Ty->isFPOrFPVectorTy()) {
@@ -1629,25 +1968,25 @@
}
unsigned MinMaxCost = 0;
unsigned ShuffleCost = 0;
- auto *ConcreteTTI = static_cast<T *>(this);
std::pair<unsigned, MVT> LT =
- ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+ thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
unsigned LongVectorCount = 0;
unsigned MVTLen =
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
- Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
- CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+ auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
+ CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
- ShuffleCost += (IsPairwise + 1) *
- ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, SubTy);
+ ShuffleCost +=
+ (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
+ Ty, NumVecElts, SubTy);
MinMaxCost +=
- ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
- ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
- nullptr);
+ thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
+ thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
Ty = SubTy;
++LongVectorCount;
}
@@ -1666,17 +2005,17 @@
if (IsPairwise && NumReduxLevels >= 1)
NumShuffles += NumReduxLevels - 1;
ShuffleCost += NumShuffles *
- ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
- 0, Ty);
+ thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
MinMaxCost +=
NumReduxLevels *
- (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
- ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
- nullptr));
+ (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
+ thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind));
// The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement.
return ShuffleCost + MinMaxCost +
- ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
unsigned getVectorSplitCost() { return 1; }
diff --git a/linux-x64/clang/include/llvm/CodeGen/CalcSpillWeights.h b/linux-x64/clang/include/llvm/CodeGen/CalcSpillWeights.h
index 9b8b732..78dae81 100644
--- a/linux-x64/clang/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/linux-x64/clang/include/llvm/CodeGen/CalcSpillWeights.h
@@ -44,64 +44,60 @@
/// Calculate auxiliary information for a virtual register such as its
/// spill weight and allocation hint.
class VirtRegAuxInfo {
- public:
- using NormalizingFn = float (*)(float, unsigned, unsigned);
-
- private:
MachineFunction &MF;
LiveIntervals &LIS;
- VirtRegMap *VRM;
+ const VirtRegMap &VRM;
const MachineLoopInfo &Loops;
const MachineBlockFrequencyInfo &MBFI;
- DenseMap<unsigned, float> Hint;
- NormalizingFn normalize;
public:
- VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
- VirtRegMap *vrm, const MachineLoopInfo &loops,
- const MachineBlockFrequencyInfo &mbfi,
- NormalizingFn norm = normalizeSpillWeight)
- : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {}
+ VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
+ const VirtRegMap &VRM, const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI)
+ : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
+
+ virtual ~VirtRegAuxInfo() = default;
/// (re)compute li's spill weight and allocation hint.
- void calculateSpillWeightAndHint(LiveInterval &li);
+ void calculateSpillWeightAndHint(LiveInterval &LI);
- /// Compute future expected spill weight of a split artifact of li
+ /// Compute future expected spill weight of a split artifact of LI
/// that will span between start and end slot indexes.
- /// \param li The live interval to be split.
- /// \param start The expected begining of the split artifact. Instructions
+ /// \param LI The live interval to be split.
+ /// \param Start The expected beginning of the split artifact. Instructions
/// before start will not affect the weight.
- /// \param end The expected end of the split artifact. Instructions
+ /// \param End The expected end of the split artifact. Instructions
/// after end will not affect the weight.
/// \return The expected spill weight of the split artifact. Returns
- /// negative weight for unspillable li.
- float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end);
+ /// negative weight for unspillable LI.
+ float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End);
+ /// Compute spill weights and allocation hints for all virtual register
+ /// live intervals.
+ void calculateSpillWeightsAndHints();
+
+ protected:
/// Helper function for weight calculations.
- /// (Re)compute li's spill weight and allocation hint, or, for non null
+ /// (Re)compute LI's spill weight and allocation hint, or, for non null
/// start and end - compute future expected spill weight of a split
- /// artifact of li that will span between start and end slot indexes.
- /// \param li The live interval for which to compute the weight.
- /// \param start The expected begining of the split artifact. Instructions
+ /// artifact of LI that will span between start and end slot indexes.
+ /// \param LI The live interval for which to compute the weight.
+ /// \param Start The expected beginning of the split artifact. Instructions
/// before start will not affect the weight. Relevant for
/// weight calculation of future split artifact.
- /// \param end The expected end of the split artifact. Instructions
+ /// \param End The expected end of the split artifact. Instructions
/// after end will not affect the weight. Relevant for
/// weight calculation of future split artifact.
- /// \return The spill weight. Returns negative weight for unspillable li.
- float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr,
- SlotIndex *end = nullptr);
+ /// \return The spill weight. Returns negative weight for unspillable LI.
+ float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr,
+ SlotIndex *End = nullptr);
+
+ /// Weight normalization function.
+ virtual float normalize(float UseDefFreq, unsigned Size,
+ unsigned NumInstr) {
+ return normalizeSpillWeight(UseDefFreq, Size, NumInstr);
+ }
};
-
- /// Compute spill weights and allocation hints for all virtual register
- /// live intervals.
- void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF,
- VirtRegMap *VRM,
- const MachineLoopInfo &MLI,
- const MachineBlockFrequencyInfo &MBFI,
- VirtRegAuxInfo::NormalizingFn norm =
- normalizeSpillWeight);
-
} // end namespace llvm
#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/CallingConvLower.h b/linux-x64/clang/include/llvm/CodeGen/CallingConvLower.h
index aa339e1..2fe4e37 100644
--- a/linux-x64/clang/include/llvm/CodeGen/CallingConvLower.h
+++ b/linux-x64/clang/include/llvm/CodeGen/CallingConvLower.h
@@ -16,16 +16,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
class CCState;
+class MachineFunction;
class MVT;
-class TargetMachine;
class TargetRegisterInfo;
/// CCValAssign - Represent assignment of one arg/retval to a location.
@@ -43,6 +44,7 @@
AExtUpper, // The value is in the upper bits of the location and should be
// extended with undefined upper bits when retrieved.
BCvt, // The value is bit-converted in the location.
+ Trunc, // The value is truncated in the location.
VExt, // The value is vector-widened in the location.
// FIXME: Not implemented yet. Code that uses AExt to mean
// vector-widen should be fixed to use VExt instead.
@@ -163,9 +165,9 @@
/// Describes a register that needs to be forwarded from the prologue to a
/// musttail call.
struct ForwardedRegister {
- ForwardedRegister(unsigned VReg, MCPhysReg PReg, MVT VT)
+ ForwardedRegister(Register VReg, MCPhysReg PReg, MVT VT)
: VReg(VReg), PReg(PReg), VT(VT) {}
- unsigned VReg;
+ Register VReg;
MCPhysReg PReg;
MVT VT;
};
@@ -197,7 +199,7 @@
LLVMContext &Context;
unsigned StackOffset;
- unsigned MaxStackArgAlign;
+ Align MaxStackArgAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags;
@@ -220,9 +222,7 @@
// ByValRegs[1] describes how "%t" is stored (Begin == r3, End == r4).
//
// In case of 8 bytes stack alignment,
- // ByValRegs may also contain information about wasted registers.
// In function shown above, r3 would be wasted according to AAPCS rules.
- // And in that case ByValRegs[1].Waste would be "true".
// ByValRegs vector size still would be 2,
// while "%t" goes to the stack: it wouldn't be described in ByValRegs.
//
@@ -232,19 +232,13 @@
// 3. Argument analysis (LowerFormatArguments, for example). After
// some byval argument was analyzed, InRegsParamsProcessed is increased.
struct ByValInfo {
- ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
- Begin(B), End(E), Waste(IsWaste) {}
+ ByValInfo(unsigned B, unsigned E) : Begin(B), End(E) {}
+
// First register allocated for current parameter.
unsigned Begin;
// First after last register allocated for current parameter.
unsigned End;
-
- // Means that current range of registers doesn't belong to any
- // parameters. It was wasted due to stack alignment rules.
- // For more information see:
- // AAPCS, 5.5 Parameter Passing, Stage C, C.3.
- bool Waste;
};
SmallVector<ByValInfo, 4 > ByValRegs;
@@ -280,8 +274,8 @@
/// isAllocated - Return true if the specified register (or an alias) is
/// allocated.
- bool isAllocated(unsigned Reg) const {
- return UsedRegs[Reg/32] & (1 << (Reg&31));
+ bool isAllocated(MCRegister Reg) const {
+ return UsedRegs[Reg / 32] & (1 << (Reg & 31));
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -331,7 +325,7 @@
/// A shadow allocated register is a register that was allocated
/// but wasn't added to the location list (Locs).
/// \returns true if the register was allocated as shadow or false otherwise.
- bool IsShadowAllocatedReg(unsigned Reg) const;
+ bool IsShadowAllocatedReg(MCRegister Reg) const;
/// AnalyzeCallResult - Same as above except it's specialized for calls which
/// produce a single value.
@@ -346,18 +340,25 @@
return Regs.size();
}
+ void DeallocateReg(MCPhysReg Reg) {
+ assert(isAllocated(Reg) && "Trying to deallocate an unallocated register");
+ MarkUnallocated(Reg);
+ }
+
/// AllocateReg - Attempt to allocate one register. If it is not available,
/// return zero. Otherwise, return the register, marking it and any aliases
/// as allocated.
- unsigned AllocateReg(unsigned Reg) {
- if (isAllocated(Reg)) return 0;
+ MCRegister AllocateReg(MCPhysReg Reg) {
+ if (isAllocated(Reg))
+ return MCRegister();
MarkAllocated(Reg);
return Reg;
}
/// Version of AllocateReg with extra register to be shadowed.
- unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
- if (isAllocated(Reg)) return 0;
+ MCRegister AllocateReg(MCPhysReg Reg, MCPhysReg ShadowReg) {
+ if (isAllocated(Reg))
+ return MCRegister();
MarkAllocated(Reg);
MarkAllocated(ShadowReg);
return Reg;
@@ -366,13 +367,13 @@
/// AllocateReg - Attempt to allocate one of the specified registers. If none
/// are available, return zero. Otherwise, return the first one available,
/// marking it and any aliases as allocated.
- unsigned AllocateReg(ArrayRef<MCPhysReg> Regs) {
+ MCPhysReg AllocateReg(ArrayRef<MCPhysReg> Regs) {
unsigned FirstUnalloc = getFirstUnallocated(Regs);
if (FirstUnalloc == Regs.size())
- return 0; // Didn't find the reg.
+ return MCRegister(); // Didn't find the reg.
// Mark the register and any aliases as allocated.
- unsigned Reg = Regs[FirstUnalloc];
+ MCPhysReg Reg = Regs[FirstUnalloc];
MarkAllocated(Reg);
return Reg;
}
@@ -380,7 +381,7 @@
/// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
/// registers. If this is not possible, return zero. Otherwise, return the first
/// register of the block that were allocated, marking the entire block as allocated.
- unsigned AllocateRegBlock(ArrayRef<MCPhysReg> Regs, unsigned RegsRequired) {
+ MCPhysReg AllocateRegBlock(ArrayRef<MCPhysReg> Regs, unsigned RegsRequired) {
if (RegsRequired > Regs.size())
return 0;
@@ -407,13 +408,13 @@
}
/// Version of AllocateReg with list of registers to be shadowed.
- unsigned AllocateReg(ArrayRef<MCPhysReg> Regs, const MCPhysReg *ShadowRegs) {
+ MCRegister AllocateReg(ArrayRef<MCPhysReg> Regs, const MCPhysReg *ShadowRegs) {
unsigned FirstUnalloc = getFirstUnallocated(Regs);
if (FirstUnalloc == Regs.size())
- return 0; // Didn't find the reg.
+ return MCRegister(); // Didn't find the reg.
// Mark the register and any aliases as allocated.
- unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+ MCRegister Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
MarkAllocated(Reg);
MarkAllocated(ShadowReg);
return Reg;
@@ -421,42 +422,48 @@
/// AllocateStack - Allocate a chunk of stack space with the specified size
/// and alignment.
- unsigned AllocateStack(unsigned Size, unsigned Align) {
- assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
- StackOffset = alignTo(StackOffset, Align);
+ unsigned AllocateStack(unsigned Size, Align Alignment) {
+ StackOffset = alignTo(StackOffset, Alignment);
unsigned Result = StackOffset;
StackOffset += Size;
- MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
- ensureMaxAlignment(Align);
+ MaxStackArgAlign = std::max(Alignment, MaxStackArgAlign);
+ ensureMaxAlignment(Alignment);
return Result;
}
- void ensureMaxAlignment(unsigned Align) {
- if (!AnalyzingMustTailForwardedRegs)
- MF.getFrameInfo().ensureMaxAlignment(Align);
+ // FIXME: Deprecate this function when transition to Align is over.
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size,
+ unsigned Alignment),
+ "Use the version that takes Align instead.") {
+ return AllocateStack(Size, Align(Alignment));
}
+ void ensureMaxAlignment(Align Alignment);
+
/// Version of AllocateStack with extra register to be shadowed.
- unsigned AllocateStack(unsigned Size, unsigned Align, unsigned ShadowReg) {
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size,
+ unsigned Alignment,
+ unsigned ShadowReg),
+ "Use the version that takes Align instead.") {
MarkAllocated(ShadowReg);
- return AllocateStack(Size, Align);
+ return AllocateStack(Size, Align(Alignment));
}
/// Version of AllocateStack with list of extra registers to be shadowed.
/// Note that, unlike AllocateReg, this shadows ALL of the shadow registers.
- unsigned AllocateStack(unsigned Size, unsigned Align,
+ unsigned AllocateStack(unsigned Size, Align Alignment,
ArrayRef<MCPhysReg> ShadowRegs) {
for (unsigned i = 0; i < ShadowRegs.size(); ++i)
MarkAllocated(ShadowRegs[i]);
- return AllocateStack(Size, Align);
+ return AllocateStack(Size, Alignment);
}
// HandleByVal - Allocate a stack slot large enough to pass an argument by
// value. The size and alignment information of the argument is encoded in its
// parameter attribute.
- void HandleByVal(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+ void HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, int MinSize, Align MinAlign,
+ ISD::ArgFlagsTy ArgFlags);
// Returns count of byval arguments that are to be stored (even partly)
// in registers.
@@ -567,7 +574,9 @@
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
- void MarkAllocated(unsigned Reg);
+ void MarkAllocated(MCPhysReg Reg);
+
+ void MarkUnallocated(MCPhysReg Reg);
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/CodeGenPassBuilder.h b/linux-x64/clang/include/llvm/CodeGen/CodeGenPassBuilder.h
new file mode 100644
index 0000000..893bc6e
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -0,0 +1,1144 @@
+//===- Construction of codegen pass pipelines ------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Interfaces for registering analysis passes, producing common pass manager
+/// configurations, and parsing of pass pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H
+#define LLVM_CODEGEN_CODEGENPASSBUILDER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/CGPassBuilderOption.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ConstantHoisting.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include <cassert>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+namespace llvm {
+
+// FIXME: Dummy target independent passes definitions that have not yet been
+// ported to new pass manager. Once they do, remove these.
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(Function &, FunctionAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ };
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ };
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ Error run(Module &, MachineFunctionAnalysisManager &) { \
+ return Error::success(); \
+ } \
+ PreservedAnalyses run(MachineFunction &, \
+ MachineFunctionAnalysisManager &) { \
+ llvm_unreachable("this api is to make new PM api happy"); \
+ } \
+ static AnalysisKey Key; \
+ };
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(MachineFunction &, \
+ MachineFunctionAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ static AnalysisKey Key; \
+ };
+#include "MachinePassRegistry.def"
+
+/// This class provides access to building LLVM's passes.
+///
+/// Its members provide the baseline state available to passes during their
+/// construction. The \c MachinePassRegistry.def file specifies how to construct
+/// all of the built-in passes, and those may reference these members during
+/// construction.
+template <typename DerivedT> class CodeGenPassBuilder {
+public:
+ explicit CodeGenPassBuilder(LLVMTargetMachine &TM, CGPassBuilderOption Opts,
+ PassInstrumentationCallbacks *PIC)
+ : TM(TM), Opt(Opts), PIC(PIC) {
+ // Target could set CGPassBuilderOption::MISchedPostRA to true to achieve
+ // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID)
+
+ // Target should override TM.Options.EnableIPRA in their target-specific
+ // LLVMTM ctor. See TargetMachine::setGlobalISel for example.
+ if (Opt.EnableIPRA)
+ TM.Options.EnableIPRA = *Opt.EnableIPRA;
+
+ if (Opt.EnableGlobalISelAbort)
+ TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort;
+
+ if (!Opt.OptimizeRegAlloc)
+ Opt.OptimizeRegAlloc = getOptLevel() != CodeGenOpt::None;
+ }
+
+ Error buildPipeline(ModulePassManager &MPM, MachineFunctionPassManager &MFPM,
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType) const;
+
+ void registerModuleAnalyses(ModuleAnalysisManager &) const;
+ void registerFunctionAnalyses(FunctionAnalysisManager &) const;
+ void registerMachineFunctionAnalyses(MachineFunctionAnalysisManager &) const;
+ std::pair<StringRef, bool> getPassNameFromLegacyName(StringRef) const;
+
+ void registerAnalyses(MachineFunctionAnalysisManager &MFAM) const {
+ registerModuleAnalyses(*MFAM.MAM);
+ registerFunctionAnalyses(*MFAM.FAM);
+ registerMachineFunctionAnalyses(MFAM);
+ }
+
+ PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const {
+ return PIC;
+ }
+
+protected:
+ template <typename PassT> using has_key_t = decltype(PassT::Key);
+
+ template <typename PassT>
+ using is_module_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Module &>(), std::declval<ModuleAnalysisManager &>()));
+
+ template <typename PassT>
+ using is_function_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Function &>(), std::declval<FunctionAnalysisManager &>()));
+
+ // Function object to maintain state while adding codegen IR passes.
+ class AddIRPass {
+ public:
+ AddIRPass(ModulePassManager &MPM, bool DebugPM, bool Check = true)
+ : MPM(MPM), FPM(DebugPM) {
+ if (Check)
+ AddingFunctionPasses = false;
+ }
+ ~AddIRPass() {
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+
+ // Add Function Pass
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_function_pass_t, PassT>::value>
+ operator()(PassT &&Pass) {
+ if (AddingFunctionPasses && !*AddingFunctionPasses)
+ AddingFunctionPasses = true;
+ FPM.addPass(std::forward<PassT>(Pass));
+ }
+
+ // Add Module Pass
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_module_pass_t, PassT>::value &&
+ !is_detected<is_function_pass_t, PassT>::value>
+ operator()(PassT &&Pass) {
+ assert((!AddingFunctionPasses || !*AddingFunctionPasses) &&
+ "could not add module pass after adding function pass");
+ MPM.addPass(std::forward<PassT>(Pass));
+ }
+
+ private:
+ ModulePassManager &MPM;
+ FunctionPassManager FPM;
+ // The codegen IR pipeline are mostly function passes with the exceptions of
+ // a few loop and module passes. `AddingFunctionPasses` make sures that
+ // we could only add module passes at the beginning of the pipeline. Once
+ // we begin adding function passes, we could no longer add module passes.
+ // This special-casing introduces less adaptor passes. If we have the need
+ // of adding module passes after function passes, we could change the
+ // implementation to accommodate that.
+ Optional<bool> AddingFunctionPasses;
+ };
+
+ // Function object to maintain state while adding codegen machine passes.
+ class AddMachinePass {
+ public:
+ AddMachinePass(MachineFunctionPassManager &PM) : PM(PM) {}
+
+ template <typename PassT> void operator()(PassT &&Pass) {
+ static_assert(
+ is_detected<has_key_t, PassT>::value,
+ "Machine function pass must define a static member variable `Key`.");
+ for (auto &C : BeforeCallbacks)
+ if (!C(&PassT::Key))
+ return;
+ PM.addPass(std::forward<PassT>(Pass));
+ for (auto &C : AfterCallbacks)
+ C(&PassT::Key);
+ }
+
+ template <typename PassT> void insertPass(AnalysisKey *ID, PassT Pass) {
+ AfterCallbacks.emplace_back(
+ [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) {
+ if (PassID == ID)
+ this->PM.addPass(std::move(Pass));
+ });
+ }
+
+ void disablePass(AnalysisKey *ID) {
+ BeforeCallbacks.emplace_back(
+ [ID](AnalysisKey *PassID) { return PassID != ID; });
+ }
+
+ MachineFunctionPassManager releasePM() { return std::move(PM); }
+
+ private:
+ MachineFunctionPassManager &PM;
+ SmallVector<llvm::unique_function<bool(AnalysisKey *)>, 4> BeforeCallbacks;
+ SmallVector<llvm::unique_function<void(AnalysisKey *)>, 4> AfterCallbacks;
+ };
+
+ LLVMTargetMachine &TM;
+ CGPassBuilderOption Opt;
+ PassInstrumentationCallbacks *PIC;
+
+ /// Target override these hooks to parse target-specific analyses.
+ void registerTargetAnalysis(ModuleAnalysisManager &) const {}
+ void registerTargetAnalysis(FunctionAnalysisManager &) const {}
+ void registerTargetAnalysis(MachineFunctionAnalysisManager &) const {}
+ std::pair<StringRef, bool> getTargetPassNameFromLegacyName(StringRef) const {
+ return {"", false};
+ }
+
+ template <typename TMC> TMC &getTM() const { return static_cast<TMC &>(TM); }
+ CodeGenOpt::Level getOptLevel() const { return TM.getOptLevel(); }
+
+ /// Check whether or not GlobalISel should abort on error.
+ /// When this is disabled, GlobalISel will fall back on SDISel instead of
+ /// erroring out.
+ bool isGlobalISelAbortEnabled() const {
+ return TM.Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
+ }
+
+ /// Check whether or not a diagnostic should be emitted when GlobalISel
+ /// uses the fallback path. In other words, it will emit a diagnostic
+ /// when GlobalISel failed and isGlobalISelAbortEnabled is false.
+ bool reportDiagnosticWhenGlobalISelFallback() const {
+ return TM.Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
+ }
+
+ /// addInstSelector - This method should install an instruction selector pass,
+ /// which converts from LLVM code to machine instructions.
+ Error addInstSelector(AddMachinePass &) const {
+ return make_error<StringError>("addInstSelector is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// Add passes that optimize instruction level parallelism for out-of-order
+ /// targets. These passes are run while the machine code is still in SSA
+ /// form, so they can use MachineTraceMetrics to control their heuristics.
+ ///
+ /// All passes added here should preserve the MachineDominatorTree,
+ /// MachineLoopInfo, and MachineTraceMetrics analyses.
+ void addILPOpts(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before register allocation.
+ void addPreRegAlloc(AddMachinePass &) const {}
+
+ /// addPreRewrite - Add passes to the optimized register allocation pipeline
+ /// after register allocation is complete, but before virtual registers are
+ /// rewritten to physical registers.
+ ///
+ /// These passes must preserve VirtRegMap and LiveIntervals, and when running
+ /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
+ /// When these passes run, VirtRegMap contains legal physreg assignments for
+ /// all virtual registers.
+ ///
+ /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not
+ /// be honored. This is also not generally used for the the fast variant,
+ /// where the allocation and rewriting are done in one pass.
+ void addPreRewrite(AddMachinePass &) const {}
+
+ /// Add passes to be run immediately after virtual registers are rewritten
+ /// to physical registers.
+ void addPostRewrite(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes after
+ /// register allocation pass pipeline but before prolog-epilog insertion.
+ void addPostRegAlloc(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes after
+ /// prolog-epilog insertion and before the second instruction scheduling pass.
+ void addPreSched2(AddMachinePass &) const {}
+
+ /// This pass may be implemented by targets that want to run passes
+ /// immediately before machine code is emitted.
+ void addPreEmitPass(AddMachinePass &) const {}
+
+ /// Targets may add passes immediately before machine code is emitted in this
+ /// callback. This is called even later than `addPreEmitPass`.
+ // FIXME: Rename `addPreEmitPass` to something more sensible given its actual
+ // position and remove the `2` suffix here as this callback is what
+ // `addPreEmitPass` *should* be but in reality isn't.
+ void addPreEmitPass2(AddMachinePass &) const {}
+
+ /// {{@ For GlobalISel
+ ///
+
+ /// addPreISel - This method should add any "last minute" LLVM->LLVM
+ /// passes (which are run just before instruction selector).
+ void addPreISel(AddIRPass &) const {
+ llvm_unreachable("addPreISel is not overridden");
+ }
+
+ /// This method should install an IR translator pass, which converts from
+ /// LLVM code to machine instructions with possibly generic opcodes.
+ Error addIRTranslator(AddMachinePass &) const {
+ return make_error<StringError>("addIRTranslator is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before legalization.
+ void addPreLegalizeMachineIR(AddMachinePass &) const {}
+
+ /// This method should install a legalize pass, which converts the instruction
+ /// sequence into one that can be selected by the target.
+ Error addLegalizeMachineIR(AddMachinePass &) const {
+ return make_error<StringError>("addLegalizeMachineIR is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before the register bank selection.
+ void addPreRegBankSelect(AddMachinePass &) const {}
+
+ /// This method should install a register bank selector pass, which
+ /// assigns register banks to virtual registers without a register
+ /// class or register banks.
+ Error addRegBankSelect(AddMachinePass &) const {
+ return make_error<StringError>("addRegBankSelect is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before the (global) instruction selection.
+ void addPreGlobalInstructionSelect(AddMachinePass &) const {}
+
+ /// This method should install a (global) instruction selector pass, which
+ /// converts possibly generic instructions to fully target-specific
+ /// instructions, thereby constraining all generic virtual registers to
+ /// register classes.
+ Error addGlobalInstructionSelect(AddMachinePass &) const {
+ return make_error<StringError>(
+ "addGlobalInstructionSelect is not overridden",
+ inconvertibleErrorCode());
+ }
+ /// @}}
+
+ /// High level function that adds all passes necessary to go from llvm IR
+ /// representation to the MI representation.
+ /// Adds IR based lowering and target specific optimization passes and finally
+ /// the core instruction selection passes.
+ /// \returns true if an error occurred, false otherwise.
+ void addISelPasses(AddIRPass &) const;
+
+ /// Add the actual instruction selection passes. This does not include
+ /// preparation passes on IR.
+ Error addCoreISelPasses(AddMachinePass &) const;
+
+ /// Add the complete, standard set of LLVM CodeGen passes.
+ /// Fully developed targets will not generally override this.
+ Error addMachinePasses(AddMachinePass &) const;
+
+ /// Add passes to lower exception handling for the code generator.
+ void addPassesToHandleExceptions(AddIRPass &) const;
+
+ /// Add common target configurable passes that perform LLVM IR to IR
+ /// transforms following machine independent optimization.
+ void addIRPasses(AddIRPass &) const;
+
+ /// Add pass to prepare the LLVM IR for code generation. This should be done
+ /// before exception handling preparation passes.
+ void addCodeGenPrepare(AddIRPass &) const;
+
+ /// Add common passes that perform LLVM IR to IR transforms in preparation for
+ /// instruction selection.
+ void addISelPrepare(AddIRPass &) const;
+
+ /// Methods with trivial inline returns are convenient points in the common
+ /// codegen pass pipeline where targets may insert passes. Methods with
+ /// out-of-line standard implementations are major CodeGen stages called by
+ /// addMachinePasses. Some targets may override major stages when inserting
+ /// passes is insufficient, but maintaining overriden stages is more work.
+ ///
+
+ /// addMachineSSAOptimization - Add standard passes that optimize machine
+ /// instructions in SSA form.
+ void addMachineSSAOptimization(AddMachinePass &) const;
+
+ /// addFastRegAlloc - Add the minimum set of target-independent passes that
+ /// are required for fast register allocation.
+ Error addFastRegAlloc(AddMachinePass &) const;
+
+ /// addOptimizedRegAlloc - Add passes related to register allocation.
+ /// LLVMTargetMachine provides standard regalloc passes for most targets.
+ void addOptimizedRegAlloc(AddMachinePass &) const;
+
+ /// Add passes that optimize machine instructions after register allocation.
+ void addMachineLateOptimization(AddMachinePass &) const;
+
+ /// addGCPasses - Add late codegen passes that analyze code for garbage
+ /// collection. This should return true if GC info should be printed after
+ /// these passes.
+ void addGCPasses(AddMachinePass &) const {}
+
+ /// Add standard basic block placement passes.
+ void addBlockPlacement(AddMachinePass &) const;
+
+ using CreateMCStreamer =
+ std::function<Expected<std::unique_ptr<MCStreamer>>(MCContext &)>;
+ void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const {
+ llvm_unreachable("addAsmPrinter is not overridden");
+ }
+
+ /// Utilities for targets to add passes to the pass manager.
+ ///
+
+ /// createTargetRegisterAllocator - Create the register allocator pass for
+ /// this target at the current optimization level.
+ void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const;
+
+ /// addMachinePasses helper to create the target-selected or overriden
+ /// regalloc pass.
+ void addRegAllocPass(AddMachinePass &, bool Optimized) const;
+
+ /// Add core register alloator passes which do the actual register assignment
+ /// and rewriting. \returns true if any passes were added.
+ Error addRegAssignmentFast(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+
+private:
+ DerivedT &derived() { return static_cast<DerivedT &>(*this); }
+ const DerivedT &derived() const {
+ return static_cast<const DerivedT &>(*this);
+ }
+};
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::buildPipeline(
+ ModulePassManager &MPM, MachineFunctionPassManager &MFPM,
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType) const {
+ AddIRPass addIRPass(MPM, Opt.DebugPM);
+ addISelPasses(addIRPass);
+
+ AddMachinePass addPass(MFPM);
+ if (auto Err = addCoreISelPasses(addPass))
+ return std::move(Err);
+
+ if (auto Err = derived().addMachinePasses(addPass))
+ return std::move(Err);
+
+ derived().addAsmPrinter(
+ addPass, [this, &Out, DwoOut, FileType](MCContext &Ctx) {
+ return this->TM.createMCStreamer(Out, DwoOut, FileType, Ctx);
+ });
+
+ addPass(FreeMachineFunctionPass());
+ return Error::success();
+}
+
+static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) {
+ AAManager AA;
+
+ // The order in which these are registered determines their priority when
+ // being queried.
+
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ AA.registerFunctionAnalysis<CFLSteensAA>();
+ break;
+ case CFLAAType::Andersen:
+ AA.registerFunctionAnalysis<CFLAndersAA>();
+ break;
+ case CFLAAType::Both:
+ AA.registerFunctionAnalysis<CFLAndersAA>();
+ AA.registerFunctionAnalysis<CFLSteensAA>();
+ break;
+ default:
+ break;
+ }
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ AA.registerFunctionAnalysis<TypeBasedAA>();
+ AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+ AA.registerFunctionAnalysis<BasicAA>();
+
+ return AA;
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerModuleAnalyses(
+ ModuleAnalysisManager &MAM) const {
+#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ MAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(MAM);
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerFunctionAnalyses(
+ FunctionAnalysisManager &FAM) const {
+ FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); });
+
+#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(FAM);
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerMachineFunctionAnalyses(
+ MachineFunctionAnalysisManager &MFAM) const {
+#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ MFAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(MFAM);
+}
+
+// FIXME: For new PM, use pass name directly in commandline seems good.
+// Translate stringfied pass name to its old commandline name. Returns the
+// matching legacy name and a boolean value indicating if the pass is a machine
+// pass.
+template <typename Derived>
+std::pair<StringRef, bool>
+CodeGenPassBuilder<Derived>::getPassNameFromLegacyName(StringRef Name) const {
+ std::pair<StringRef, bool> Ret;
+ if (Name.empty())
+ return Ret;
+
+#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#include "llvm/CodeGen/MachinePassRegistry.def"
+
+ if (Ret.first.empty())
+ Ret = derived().getTargetPassNameFromLegacyName(Name);
+
+ if (Ret.first.empty())
+ report_fatal_error(Twine('\"') + Twine(Name) +
+ Twine("\" pass could not be found."));
+
+ return Ret;
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addISelPasses(AddIRPass &addPass) const {
+ if (TM.useEmulatedTLS())
+ addPass(LowerEmuTLSPass());
+
+ addPass(PreISelIntrinsicLoweringPass());
+
+ derived().addIRPasses(addPass);
+ derived().addCodeGenPrepare(addPass);
+ addPassesToHandleExceptions(addPass);
+ derived().addISelPrepare(addPass);
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!Opt.DisableVerify)
+ addPass(VerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableLSR) {
+ addPass(createFunctionToLoopPassAdaptor(
+ LoopStrengthReducePass(), /*UseMemorySSA*/ true, Opt.DebugPM));
+ // FIXME: use -stop-after so we could remove PrintLSR
+ if (Opt.PrintLSR)
+ addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
+ }
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+ // loads and compares. ExpandMemCmpPass then tries to expand those calls
+ // into optimally-sized loads and compares. The transforms are enabled by a
+ // target lowering hook.
+ if (!Opt.DisableMergeICmps)
+ addPass(MergeICmpsPass());
+ addPass(ExpandMemCmpPass());
+ }
+
+ // Run GC lowering passes for builtin collectors
+ // TODO: add a pass insertion point here
+ addPass(GCLoweringPass());
+ addPass(ShadowStackGCLoweringPass());
+ addPass(LowerConstantIntrinsicsPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(UnreachableBlockElimPass());
+
+ // Prepare expensive constants for SelectionDAG.
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting)
+ addPass(ConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining)
+ addPass(PartiallyInlineLibCallsPass());
+
+ // Instrument function entry and exit, e.g. with calls to mcount().
+ addPass(EntryExitInstrumenterPass(/*PostInlining=*/true));
+
+ // Add scalarization of target's unsupported masked memory intrinsics pass.
+ // the unsupported intrinsic will be replaced with a chain of basic blocks,
+ // that stores/loads element one-by-one if the appropriate mask bit is set.
+ addPass(ScalarizeMaskedMemIntrinPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ addPass(ExpandReductionsPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addPassesToHandleExceptions(
+ AddIRPass &addPass) const {
+ const MCAsmInfo *MCAI = TM.getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ switch (MCAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(SjLjEHPreparePass());
+ LLVM_FALLTHROUGH;
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::AIX:
+ addPass(DwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
+ addPass(WinEHPass());
+ addPass(DwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::Wasm:
+ // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
+ // on catchpads and cleanuppads because it does not outline them into
+ // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
+ // should remove PHIs there.
+ addPass(WinEHPass(/*DemoteCatchSwitchPHIOnly=*/false));
+ addPass(WasmEHPass());
+ break;
+ case ExceptionHandling::None:
+ addPass(LowerInvokePass());
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(UnreachableBlockElimPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addCodeGenPrepare(AddIRPass &addPass) const {
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableCGP)
+ addPass(CodeGenPreparePass());
+ // TODO: Default ctor'd RewriteSymbolPass is no-op.
+ // addPass(RewriteSymbolPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addISelPrepare(AddIRPass &addPass) const {
+ derived().addPreISel(addPass);
+
+ // Add both the safe stack and the stack protection passes: each of them will
+ // only protect functions that have corresponding attributes.
+ addPass(SafeStackPass());
+ addPass(StackProtectorPass());
+
+ if (Opt.PrintISelInput)
+ addPass(PrintFunctionPass(dbgs(),
+ "\n\n*** Final LLVM Code input to ISel ***\n"));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!Opt.DisableVerify)
+ addPass(VerifierPass());
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addCoreISelPasses(
+ AddMachinePass &addPass) const {
+ // Enable FastISel with -fast-isel, but allow that to be overridden.
+ TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true));
+
+ // Determine an instruction selector.
+ enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+ SelectorType Selector;
+
+ if (Opt.EnableFastISelOption && *Opt.EnableFastISelOption == true)
+ Selector = SelectorType::FastISel;
+ else if ((Opt.EnableGlobalISelOption &&
+ *Opt.EnableGlobalISelOption == true) ||
+ (TM.Options.EnableGlobalISel &&
+ (!Opt.EnableGlobalISelOption ||
+ *Opt.EnableGlobalISelOption == false)))
+ Selector = SelectorType::GlobalISel;
+ else if (TM.getOptLevel() == CodeGenOpt::None && TM.getO0WantsFastISel())
+ Selector = SelectorType::FastISel;
+ else
+ Selector = SelectorType::SelectionDAG;
+
+ // Set consistently TM.Options.EnableFastISel and EnableGlobalISel.
+ if (Selector == SelectorType::FastISel) {
+ TM.setFastISel(true);
+ TM.setGlobalISel(false);
+ } else if (Selector == SelectorType::GlobalISel) {
+ TM.setFastISel(false);
+ TM.setGlobalISel(true);
+ }
+
+ // Add instruction selector passes.
+ if (Selector == SelectorType::GlobalISel) {
+ if (auto Err = derived().addIRTranslator(addPass))
+ return std::move(Err);
+
+ derived().addPreLegalizeMachineIR(addPass);
+
+ if (auto Err = derived().addLegalizeMachineIR(addPass))
+ return std::move(Err);
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ derived().addPreRegBankSelect(addPass);
+
+ if (auto Err = derived().addRegBankSelect(addPass))
+ return std::move(Err);
+
+ derived().addPreGlobalInstructionSelect(addPass);
+
+ if (auto Err = derived().addGlobalInstructionSelect(addPass))
+ return std::move(Err);
+
+ // Pass to reset the MachineFunction if the ISel failed.
+ addPass(ResetMachineFunctionPass(reportDiagnosticWhenGlobalISelFallback(),
+ isGlobalISelAbortEnabled()));
+
+ // Provide a fallback path when we do not want to abort on
+ // not-yet-supported input.
+ if (!isGlobalISelAbortEnabled())
+ if (auto Err = derived().addInstSelector(addPass))
+ return std::move(Err);
+
+ } else if (auto Err = derived().addInstSelector(addPass))
+ return std::move(Err);
+
+ // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+ // FinalizeISel.
+ addPass(FinalizeISelPass());
+
+ // // Print the instruction selected machine code...
+ // printAndVerify("After Instruction Selection");
+
+ return Error::success();
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any CodeGenPassBuilder<Derived>::addXX routine may be overriden by the
+/// Target. The addPre/Post methods with empty header implementations allow
+/// injecting target-specific fixups just before or after major stages.
+/// Additionally, targets have the flexibility to change pass order within a
+/// stage by overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addMachinePasses(
+ AddMachinePass &addPass) const {
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ derived().addMachineSSAOptimization(addPass);
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotPass());
+ }
+
+ if (TM.Options.EnableIPRA)
+ addPass(RegUsageInfoPropagationPass());
+
+ // Run pre-ra passes.
+ derived().addPreRegAlloc(addPass);
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (*Opt.OptimizeRegAlloc) {
+ derived().addOptimizedRegAlloc(addPass);
+ } else {
+ if (auto Err = derived().addFastRegAlloc(addPass))
+ return Err;
+ }
+
+ // Run post-ra passes.
+ derived().addPostRegAlloc(addPass);
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(PostRAMachineSinkingPass());
+ addPass(ShrinkWrapPass());
+ }
+
+ addPass(PrologEpilogInserterPass());
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ derived().addMachineLateOptimization(addPass);
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(ExpandPostRAPseudosPass());
+
+ // Run pre-sched2 passes.
+ derived().addPreSched2(addPass);
+
+ if (Opt.EnableImplicitNullChecks)
+ addPass(ImplicitNullChecksPass());
+
+ // Second pass scheduler.
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM.targetSchedulesPostRAScheduling()) {
+ if (Opt.MISchedPostRA)
+ addPass(PostMachineSchedulerPass());
+ else
+ addPass(PostRASchedulerPass());
+ }
+
+ // GC
+ derived().addGCPasses(addPass);
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ derived().addBlockPlacement(addPass);
+
+ // Insert before XRay Instrumentation.
+ addPass(FEntryInserterPass());
+
+ addPass(XRayInstrumentationPass());
+ addPass(PatchableFunctionPass());
+
+ derived().addPreEmitPass(addPass);
+
+ if (TM.Options.EnableIPRA)
+ // Collect register usage information and produce a register mask of
+ // clobbered registers, to be used to optimize call sites.
+ addPass(RegUsageInfoCollectorPass());
+
+ addPass(FuncletLayoutPass());
+
+ addPass(StackMapLivenessPass());
+ addPass(LiveDebugValuesPass());
+
+ if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
+ Opt.EnableMachineOutliner != RunOutliner::NeverOutline) {
+ bool RunOnAllFunctions =
+ (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline);
+ bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining;
+ if (AddOutliner)
+ addPass(MachineOutlinerPass(RunOnAllFunctions));
+ }
+
+ // Add passes that directly emit MI after all other MI passes.
+ derived().addPreEmitPass2(addPass);
+
+ return Error::success();
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addMachineSSAOptimization(
+ AddMachinePass &addPass) const {
+ // Pre-ra tail duplication.
+ addPass(EarlyTailDuplicatePass());
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(OptimizePHIsPass());
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(StackColoringPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotPass());
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(DeadMachineInstructionElimPass());
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ derived().addILPOpts(addPass);
+
+ addPass(EarlyMachineLICMPass());
+ addPass(MachineCSEPass());
+
+ addPass(MachineSinkingPass());
+
+ addPass(PeepholeOptimizerPass());
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(DeadMachineInstructionElimPass());
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addTargetRegisterAllocator(
+ AddMachinePass &addPass, bool Optimized) const {
+ if (Optimized)
+ addPass(RAGreedyPass());
+ else
+ addPass(RAFastPass());
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addRegAllocPass(AddMachinePass &addPass,
+ bool Optimized) const {
+ if (Opt.RegAlloc == RegAllocType::Default)
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
+ else if (Opt.RegAlloc == RegAllocType::Basic)
+ addPass(RABasicPass());
+ else if (Opt.RegAlloc == RegAllocType::Fast)
+ addPass(RAFastPass());
+ else if (Opt.RegAlloc == RegAllocType::Greedy)
+ addPass(RAGreedyPass());
+ else if (Opt.RegAlloc == RegAllocType::PBQP)
+ addPass(RAPBQPPass());
+ else
+ llvm_unreachable("unknonwn register allocator type");
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Default &&
+ Opt.RegAlloc != RegAllocType::Fast)
+ return make_error<StringError>(
+ "Must use fast (default) register allocator for unoptimized regalloc.",
+ inconvertibleErrorCode());
+
+ addRegAllocPass(addPass, false);
+ return Error::success();
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ // Add the selected register allocation pass.
+ addRegAllocPass(addPass, true);
+
+ // Allow targets to change the register assignments before rewriting.
+ derived().addPreRewrite(addPass);
+
+ // Finally rewrite virtual registers.
+ addPass(VirtRegRewriterPass());
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(StackSlotColoringPass());
+
+ return Error::success();
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addFastRegAlloc(
+ AddMachinePass &addPass) const {
+ addPass(PHIEliminationPass());
+ addPass(TwoAddressInstructionPass());
+ return derived().addRegAssignmentFast(addPass);
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addOptimizedRegAlloc(
+ AddMachinePass &addPass) const {
+ addPass(DetectDeadLanesPass());
+
+ addPass(ProcessImplicitDefsPass());
+
+ // Edge splitting is smarter with machine loop info.
+ addPass(PHIEliminationPass());
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (Opt.EarlyLiveIntervals)
+ addPass(LiveIntervalsPass());
+
+ addPass(TwoAddressInstructionPass());
+ addPass(RegisterCoalescerPass());
+
+ // The machine scheduler may accidentally create disconnected components
+ // when moving subregister definitions around, avoid this by splitting them to
+ // separate vregs before. Splitting can also improve reg. allocation quality.
+ addPass(RenameIndependentSubregsPass());
+
+ // PreRA instruction scheduling.
+ addPass(MachineSchedulerPass());
+
+ if (derived().addRegAssignmentOptimized(addPass)) {
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
+
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
+ }
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
+ AddMachinePass &addPass) const {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ addPass(BranchFolderPass());
+
+ // Tail duplication.
+ // Note that duplicating tail just increases code size and degrades
+ // performance for targets that require Structured Control Flow.
+ // In addition it can also make CFG irreducible. Thus we disable it.
+ if (!TM.requiresStructuredCFG())
+ addPass(TailDuplicatePass());
+
+ // Copy propagation.
+ addPass(MachineCopyPropagationPass());
+}
+
+/// Add standard basic block placement passes.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addBlockPlacement(
+ AddMachinePass &addPass) const {
+ addPass(MachineBlockPlacementPass());
+ // Run a separate pass to collect block placement statistics.
+ if (Opt.EnableBlockPlacementStats)
+ addPass(MachineBlockPlacementStatsPass());
+}
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENPASSBUILDER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/CommandFlags.h b/linux-x64/clang/include/llvm/CodeGen/CommandFlags.h
new file mode 100644
index 0000000..e6c64cd
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/CommandFlags.h
@@ -0,0 +1,175 @@
+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class Module;
+
+namespace codegen {
+
+std::string getMArch();
+
+std::string getMCPU();
+
+std::vector<std::string> getMAttrs();
+
+Reloc::Model getRelocModel();
+Optional<Reloc::Model> getExplicitRelocModel();
+
+ThreadModel::Model getThreadModel();
+
+CodeModel::Model getCodeModel();
+Optional<CodeModel::Model> getExplicitCodeModel();
+
+llvm::ExceptionHandling getExceptionModel();
+
+CodeGenFileType getFileType();
+Optional<CodeGenFileType> getExplicitFileType();
+
+CodeGenFileType getFileType();
+
+llvm::FramePointer::FP getFramePointerUsage();
+
+bool getEnableUnsafeFPMath();
+
+bool getEnableNoInfsFPMath();
+
+bool getEnableNoNaNsFPMath();
+
+bool getEnableNoSignedZerosFPMath();
+
+bool getEnableNoTrappingFPMath();
+
+DenormalMode::DenormalModeKind getDenormalFPMath();
+DenormalMode::DenormalModeKind getDenormalFP32Math();
+
+bool getEnableHonorSignDependentRoundingFPMath();
+
+llvm::FloatABI::ABIType getFloatABIForCalls();
+
+llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
+
+bool getDontPlaceZerosInBSS();
+
+bool getEnableGuaranteedTailCallOpt();
+
+bool getEnableAIXExtendedAltivecABI();
+
+bool getDisableTailCalls();
+
+bool getStackSymbolOrdering();
+
+unsigned getOverrideStackAlignment();
+
+bool getStackRealign();
+
+std::string getTrapFuncName();
+
+bool getUseCtors();
+
+bool getRelaxELFRelocations();
+
+bool getDataSections();
+Optional<bool> getExplicitDataSections();
+
+bool getFunctionSections();
+Optional<bool> getExplicitFunctionSections();
+
+bool getIgnoreXCOFFVisibility();
+
+bool getXCOFFTracebackTable();
+
+std::string getBBSections();
+
+std::string getStackProtectorGuard();
+unsigned getStackProtectorGuardOffset();
+std::string getStackProtectorGuardReg();
+
+unsigned getTLSSize();
+
+bool getEmulatedTLS();
+
+bool getUniqueSectionNames();
+
+bool getUniqueBasicBlockSectionNames();
+
+llvm::EABI getEABIVersion();
+
+llvm::DebuggerKind getDebuggerTuningOpt();
+
+bool getEnableStackSizeSection();
+
+bool getEnableAddrsig();
+
+bool getEmitCallSiteInfo();
+
+bool getEnableMachineFunctionSplitter();
+
+bool getEnableDebugEntryValues();
+
+bool getPseudoProbeForProfiling();
+
+bool getValueTrackingVariableLocations();
+
+bool getForceDwarfFrameSection();
+
+bool getXRayOmitFunctionIndex();
+
+/// Create this object with static storage to register codegen-related command
+/// line options.
+struct RegisterCodeGenFlags {
+ RegisterCodeGenFlags();
+};
+
+llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options);
+
+llvm::StackProtectorGuards
+getStackProtectorGuardMode(llvm::TargetOptions &Options);
+
+/// Common utility function tightly tied to the options listed here. Initializes
+/// a TargetOptions object with CodeGen flags and returns it.
+/// \p TheTriple is used to determine the default value for options if
+/// options are not explicitly specified. If those triple dependant options
+/// value do not have effect for your component, a default Triple() could be
+/// passed in.
+TargetOptions InitTargetOptionsFromCodeGenFlags(const llvm::Triple &TheTriple);
+
+std::string getCPUStr();
+
+std::string getFeaturesStr();
+
+std::vector<std::string> getFeatureList();
+
+void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val);
+
+/// Set function attributes of function \p F based on CPU, Features, and command
+/// line flags.
+void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F);
+
+/// Set function attributes of functions in Module M based on CPU,
+/// Features, and command line flags.
+void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M);
+} // namespace codegen
+} // namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/CommandFlags.inc b/linux-x64/clang/include/llvm/CodeGen/CommandFlags.inc
deleted file mode 100644
index cb69e9f..0000000
--- a/linux-x64/clang/include/llvm/CodeGen/CommandFlags.inc
+++ /dev/null
@@ -1,411 +0,0 @@
-//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains codegen-specific flags that are shared between different
-// command line tools. The tools "llc" and "opt" both use this file to prevent
-// flag duplication.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include <string>
-using namespace llvm;
-
-static cl::opt<std::string>
- MArch("march",
- cl::desc("Architecture to generate code for (see --version)"));
-
-static cl::opt<std::string>
- MCPU("mcpu",
- cl::desc("Target a specific cpu type (-mcpu=help for details)"),
- cl::value_desc("cpu-name"), cl::init(""));
-
-static cl::list<std::string>
- MAttrs("mattr", cl::CommaSeparated,
- cl::desc("Target specific attributes (-mattr=help for details)"),
- cl::value_desc("a1,+a2,-a3,..."));
-
-static cl::opt<Reloc::Model> RelocModel(
- "relocation-model", cl::desc("Choose relocation model"),
- cl::values(
- clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
- clEnumValN(Reloc::PIC_, "pic",
- "Fully relocatable, position independent code"),
- clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
- "Relocatable external references, non-relocatable code"),
- clEnumValN(Reloc::ROPI, "ropi",
- "Code and read-only data relocatable, accessed PC-relative"),
- clEnumValN(
- Reloc::RWPI, "rwpi",
- "Read-write data relocatable, accessed relative to static base"),
- clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
- "Combination of ropi and rwpi")));
-
-LLVM_ATTRIBUTE_UNUSED static Optional<Reloc::Model> getRelocModel() {
- if (RelocModel.getNumOccurrences()) {
- Reloc::Model R = RelocModel;
- return R;
- }
- return None;
-}
-
-static cl::opt<ThreadModel::Model> TMModel(
- "thread-model", cl::desc("Choose threading model"),
- cl::init(ThreadModel::POSIX),
- cl::values(clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"),
- clEnumValN(ThreadModel::Single, "single",
- "Single thread model")));
-
-static cl::opt<llvm::CodeModel::Model> CMModel(
- "code-model", cl::desc("Choose code model"),
- cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
- clEnumValN(CodeModel::Small, "small", "Small code model"),
- clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
- clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
- clEnumValN(CodeModel::Large, "large", "Large code model")));
-
-LLVM_ATTRIBUTE_UNUSED static Optional<CodeModel::Model> getCodeModel() {
- if (CMModel.getNumOccurrences()) {
- CodeModel::Model M = CMModel;
- return M;
- }
- return None;
-}
-
-static cl::opt<llvm::ExceptionHandling> ExceptionModel(
- "exception-model", cl::desc("exception model"),
- cl::init(ExceptionHandling::None),
- cl::values(
- clEnumValN(ExceptionHandling::None, "default",
- "default exception handling model"),
- clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
- "DWARF-like CFI based exception handling"),
- clEnumValN(ExceptionHandling::SjLj, "sjlj", "SjLj exception handling"),
- clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
- clEnumValN(ExceptionHandling::WinEH, "wineh",
- "Windows exception model"),
- clEnumValN(ExceptionHandling::Wasm, "wasm",
- "WebAssembly exception handling")));
-
-static cl::opt<TargetMachine::CodeGenFileType> FileType(
- "filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
- cl::desc(
- "Choose a file type (not all types are supported by all targets):"),
- cl::values(clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
- "Emit an assembly ('.s') file"),
- clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
- "Emit a native object ('.o') file"),
- clEnumValN(TargetMachine::CGFT_Null, "null",
- "Emit nothing, for performance testing")));
-
-static cl::opt<llvm::FramePointer::FP> FramePointerUsage(
- "frame-pointer", cl::desc("Specify frame pointer elimination optimization"),
- cl::init(llvm::FramePointer::None),
- cl::values(
- clEnumValN(llvm::FramePointer::All, "all",
- "Disable frame pointer elimination"),
- clEnumValN(llvm::FramePointer::NonLeaf, "non-leaf",
- "Disable frame pointer elimination for non-leaf frame"),
- clEnumValN(llvm::FramePointer::None, "none",
- "Enable frame pointer elimination")));
-
-static cl::opt<bool> EnableUnsafeFPMath(
- "enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::init(false));
-
-static cl::opt<bool> EnableNoInfsFPMath(
- "enable-no-infs-fp-math",
- cl::desc("Enable FP math optimizations that assume no +-Infs"),
- cl::init(false));
-
-static cl::opt<bool> EnableNoNaNsFPMath(
- "enable-no-nans-fp-math",
- cl::desc("Enable FP math optimizations that assume no NaNs"),
- cl::init(false));
-
-static cl::opt<bool> EnableNoSignedZerosFPMath(
- "enable-no-signed-zeros-fp-math",
- cl::desc("Enable FP math optimizations that assume "
- "the sign of 0 is insignificant"),
- cl::init(false));
-
-static cl::opt<bool>
- EnableNoTrappingFPMath("enable-no-trapping-fp-math",
- cl::desc("Enable setting the FP exceptions build "
- "attribute not to use exceptions"),
- cl::init(false));
-
-static cl::opt<llvm::FPDenormal::DenormalMode> DenormalMode(
- "denormal-fp-math",
- cl::desc("Select which denormal numbers the code is permitted to require"),
- cl::init(FPDenormal::IEEE),
- cl::values(clEnumValN(FPDenormal::IEEE, "ieee",
- "IEEE 754 denormal numbers"),
- clEnumValN(FPDenormal::PreserveSign, "preserve-sign",
- "the sign of a flushed-to-zero number is preserved "
- "in the sign of 0"),
- clEnumValN(FPDenormal::PositiveZero, "positive-zero",
- "denormals are flushed to positive zero")));
-
-static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
- "enable-sign-dependent-rounding-fp-math", cl::Hidden,
- cl::desc("Force codegen to assume rounding mode can change dynamically"),
- cl::init(false));
-
-static cl::opt<llvm::FloatABI::ABIType> FloatABIForCalls(
- "float-abi", cl::desc("Choose float ABI type"), cl::init(FloatABI::Default),
- cl::values(clEnumValN(FloatABI::Default, "default",
- "Target default float ABI type"),
- clEnumValN(FloatABI::Soft, "soft",
- "Soft float ABI (implied by -soft-float)"),
- clEnumValN(FloatABI::Hard, "hard",
- "Hard float ABI (uses FP registers)")));
-
-static cl::opt<llvm::FPOpFusion::FPOpFusionMode> FuseFPOps(
- "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"),
- cl::init(FPOpFusion::Standard),
- cl::values(
- clEnumValN(FPOpFusion::Fast, "fast", "Fuse FP ops whenever profitable"),
- clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."),
- clEnumValN(FPOpFusion::Strict, "off",
- "Only fuse FP ops when the result won't be affected.")));
-
-static cl::opt<bool> DontPlaceZerosInBSS(
- "nozero-initialized-in-bss",
- cl::desc("Don't place zero-initialized symbols into bss section"),
- cl::init(false));
-
-static cl::opt<bool> EnableGuaranteedTailCallOpt(
- "tailcallopt",
- cl::desc(
- "Turn fastcc calls into tail calls by (potentially) changing ABI."),
- cl::init(false));
-
-static cl::opt<bool> DisableTailCalls("disable-tail-calls",
- cl::desc("Never emit tail calls"),
- cl::init(false));
-
-static cl::opt<bool> StackSymbolOrdering("stack-symbol-ordering",
- cl::desc("Order local stack symbols."),
- cl::init(true));
-
-static cl::opt<unsigned>
- OverrideStackAlignment("stack-alignment",
- cl::desc("Override default stack alignment"),
- cl::init(0));
-
-static cl::opt<bool>
- StackRealign("stackrealign",
- cl::desc("Force align the stack to the minimum alignment"),
- cl::init(false));
-
-static cl::opt<std::string> TrapFuncName(
- "trap-func", cl::Hidden,
- cl::desc("Emit a call to trap function rather than a trap instruction"),
- cl::init(""));
-
-static cl::opt<bool> UseCtors("use-ctors",
- cl::desc("Use .ctors instead of .init_array."),
- cl::init(false));
-
-static cl::opt<bool> RelaxELFRelocations(
- "relax-elf-relocations",
- cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
- cl::init(false));
-
-static cl::opt<bool> DataSections("data-sections",
- cl::desc("Emit data into separate sections"),
- cl::init(false));
-
-static cl::opt<bool>
- FunctionSections("function-sections",
- cl::desc("Emit functions into separate sections"),
- cl::init(false));
-
-static cl::opt<bool> EmulatedTLS("emulated-tls",
- cl::desc("Use emulated TLS model"),
- cl::init(false));
-
-static cl::opt<bool>
- UniqueSectionNames("unique-section-names",
- cl::desc("Give unique names to every section"),
- cl::init(true));
-
-static cl::opt<llvm::EABI>
- EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"),
- cl::init(EABI::Default),
- cl::values(clEnumValN(EABI::Default, "default",
- "Triple default EABI version"),
- clEnumValN(EABI::EABI4, "4", "EABI version 4"),
- clEnumValN(EABI::EABI5, "5", "EABI version 5"),
- clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
-
-static cl::opt<DebuggerKind> DebuggerTuningOpt(
- "debugger-tune", cl::desc("Tune debug info for a particular debugger"),
- cl::init(DebuggerKind::Default),
- cl::values(clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
- clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
- clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
-
-static cl::opt<bool> EnableStackSizeSection(
- "stack-size-section",
- cl::desc("Emit a section containing stack size metadata"), cl::init(false));
-
-static cl::opt<bool>
- EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
- cl::init(false));
-
-static cl::opt<bool>
- EnableDebugEntryValues("debug-entry-values",
- cl::desc("Emit debug info about parameter's entry values"),
- cl::init(false));
-
-// Common utility function tightly tied to the options listed here. Initializes
-// a TargetOptions object with CodeGen flags and returns it.
-static TargetOptions InitTargetOptionsFromCodeGenFlags() {
- TargetOptions Options;
- Options.AllowFPOpFusion = FuseFPOps;
- Options.UnsafeFPMath = EnableUnsafeFPMath;
- Options.NoInfsFPMath = EnableNoInfsFPMath;
- Options.NoNaNsFPMath = EnableNoNaNsFPMath;
- Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
- Options.NoTrappingFPMath = EnableNoTrappingFPMath;
- Options.FPDenormalMode = DenormalMode;
- Options.HonorSignDependentRoundingFPMathOption =
- EnableHonorSignDependentRoundingFPMath;
- if (FloatABIForCalls != FloatABI::Default)
- Options.FloatABIType = FloatABIForCalls;
- Options.NoZerosInBSS = DontPlaceZerosInBSS;
- Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
- Options.StackAlignmentOverride = OverrideStackAlignment;
- Options.StackSymbolOrdering = StackSymbolOrdering;
- Options.UseInitArray = !UseCtors;
- Options.RelaxELFRelocations = RelaxELFRelocations;
- Options.DataSections = DataSections;
- Options.FunctionSections = FunctionSections;
- Options.UniqueSectionNames = UniqueSectionNames;
- Options.EmulatedTLS = EmulatedTLS;
- Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
- Options.ExceptionModel = ExceptionModel;
- Options.EmitStackSizeSection = EnableStackSizeSection;
- Options.EmitAddrsig = EnableAddrsig;
- Options.EnableDebugEntryValues = EnableDebugEntryValues;
-
- Options.MCOptions = InitMCTargetOptionsFromFlags();
-
- Options.ThreadModel = TMModel;
- Options.EABIVersion = EABIVersion;
- Options.DebuggerTuning = DebuggerTuningOpt;
-
- return Options;
-}
-
-LLVM_ATTRIBUTE_UNUSED static std::string getCPUStr() {
- // If user asked for the 'native' CPU, autodetect here. If autodection fails,
- // this will set the CPU to an empty string which tells the target to
- // pick a basic default.
- if (MCPU == "native")
- return sys::getHostCPUName();
-
- return MCPU;
-}
-
-LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() {
- SubtargetFeatures Features;
-
- // If user asked for the 'native' CPU, we need to autodetect features.
- // This is necessary for x86 where the CPU might not support all the
- // features the autodetected CPU name lists in the target. For example,
- // not all Sandybridge processors support AVX.
- if (MCPU == "native") {
- StringMap<bool> HostFeatures;
- if (sys::getHostCPUFeatures(HostFeatures))
- for (auto &F : HostFeatures)
- Features.AddFeature(F.first(), F.second);
- }
-
- for (unsigned i = 0; i != MAttrs.size(); ++i)
- Features.AddFeature(MAttrs[i]);
-
- return Features.getString();
-}
-
-LLVM_ATTRIBUTE_UNUSED static std::vector<std::string> getFeatureList() {
- SubtargetFeatures Features;
-
- // If user asked for the 'native' CPU, we need to autodetect features.
- // This is necessary for x86 where the CPU might not support all the
- // features the autodetected CPU name lists in the target. For example,
- // not all Sandybridge processors support AVX.
- if (MCPU == "native") {
- StringMap<bool> HostFeatures;
- if (sys::getHostCPUFeatures(HostFeatures))
- for (auto &F : HostFeatures)
- Features.AddFeature(F.first(), F.second);
- }
-
- for (unsigned i = 0; i != MAttrs.size(); ++i)
- Features.AddFeature(MAttrs[i]);
-
- return Features.getFeatures();
-}
-
-/// Set function attributes of functions in Module M based on CPU,
-/// Features, and command line flags.
-LLVM_ATTRIBUTE_UNUSED static void
-setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
- for (auto &F : M) {
- auto &Ctx = F.getContext();
- AttributeList Attrs = F.getAttributes();
- AttrBuilder NewAttrs;
-
- if (!CPU.empty())
- NewAttrs.addAttribute("target-cpu", CPU);
- if (!Features.empty())
- NewAttrs.addAttribute("target-features", Features);
- if (FramePointerUsage.getNumOccurrences() > 0) {
- if (FramePointerUsage == llvm::FramePointer::All)
- NewAttrs.addAttribute("frame-pointer", "all");
- else if (FramePointerUsage == llvm::FramePointer::NonLeaf)
- NewAttrs.addAttribute("frame-pointer", "non-leaf");
- else if (FramePointerUsage == llvm::FramePointer::None)
- NewAttrs.addAttribute("frame-pointer", "none");
- }
- if (DisableTailCalls.getNumOccurrences() > 0)
- NewAttrs.addAttribute("disable-tail-calls",
- toStringRef(DisableTailCalls));
- if (StackRealign)
- NewAttrs.addAttribute("stackrealign");
-
- if (TrapFuncName.getNumOccurrences() > 0)
- for (auto &B : F)
- for (auto &I : B)
- if (auto *Call = dyn_cast<CallInst>(&I))
- if (const auto *F = Call->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::debugtrap ||
- F->getIntrinsicID() == Intrinsic::trap)
- Call->addAttribute(
- llvm::AttributeList::FunctionIndex,
- Attribute::get(Ctx, "trap-func-name", TrapFuncName));
-
- // Let NewAttrs override Attrs.
- F.setAttributes(
- Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
- }
-}
diff --git a/linux-x64/clang/include/llvm/CodeGen/DFAPacketizer.h b/linux-x64/clang/include/llvm/CodeGen/DFAPacketizer.h
index cf58ee0..9cdaedc 100644
--- a/linux-x64/clang/include/llvm/CodeGen/DFAPacketizer.h
+++ b/linux-x64/clang/include/llvm/CodeGen/DFAPacketizer.h
@@ -28,6 +28,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/Support/Automaton.h"
#include <cstdint>
#include <map>
#include <memory>
@@ -45,64 +46,33 @@
class SUnit;
class TargetInstrInfo;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-// 4 terms x 8 bits = 32 bits
-// 3 terms x 10 bits = 30 bits
-// 2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-// 8 terms x 8 bits = 64 bits
-// 7 terms x 9 bits = 63 bits
-// 6 terms x 10 bits = 60 bits
-// 5 terms x 12 bits = 60 bits
-// 4 terms x 16 bits = 64 bits <--- current
-// 3 terms x 21 bits = 63 bits
-// 2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
-
-using DFAInput = uint64_t;
-using DFAStateInput = int64_t;
-
-#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
-// --------------------------------------------------------------------
-
class DFAPacketizer {
private:
- using UnsignPair = std::pair<unsigned, DFAInput>;
-
const InstrItineraryData *InstrItins;
- int CurrentState = 0;
- const DFAStateInput (*DFAStateInputTable)[2];
- const unsigned *DFAStateEntryTable;
-
- // CachedTable is a map from <FromState, Input> to ToState.
- DenseMap<UnsignPair, unsigned> CachedTable;
-
- // Read the DFA transition table and update CachedTable.
- void ReadTable(unsigned state);
+ Automaton<uint64_t> A;
+ /// For every itinerary, an "action" to apply to the automaton. This removes
+ /// the redundancy in actions between itinerary classes.
+ ArrayRef<unsigned> ItinActions;
public:
- DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
- const unsigned *SET);
+ DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a,
+ ArrayRef<unsigned> ItinActions)
+ : InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) {
+ // Start off with resource tracking disabled.
+ A.enableTranscription(false);
+ }
// Reset the current state to make all resources available.
void clearResources() {
- CurrentState = 0;
+ A.reset();
}
- // Return the DFAInput for an instruction class.
- DFAInput getInsnInput(unsigned InsnClass);
-
- // Return the DFAInput for an instruction class input vector.
- static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
+ // Set whether this packetizer should track not just whether instructions
+ // can be packetized, but also which functional units each instruction ends up
+ // using after packetization.
+ void setTrackResources(bool Track) {
+ A.enableTranscription(Track);
+ }
// Check if the resources occupied by a MCInstrDesc are available in
// the current state.
@@ -120,6 +90,15 @@
// current state to reflect that change.
void reserveResources(MachineInstr &MI);
+ // Return the resources used by the InstIdx'th instruction added to this
+ // packet. The resources are returned as a bitvector of functional units.
+ //
+ // Note that a bundle may be packed in multiple valid ways. This function
+ // returns one arbitary valid packing.
+ //
+ // Requires setTrackResources(true) to have been called.
+ unsigned getUsedResources(unsigned InstIdx);
+
const InstrItineraryData *getInstrItins() const { return InstrItins; }
};
@@ -134,7 +113,7 @@
protected:
MachineFunction &MF;
const TargetInstrInfo *TII;
- AliasAnalysis *AA;
+ AAResults *AA;
// The VLIW Scheduler.
DefaultVLIWScheduler *VLIWScheduler;
@@ -146,9 +125,9 @@
std::map<MachineInstr*, SUnit*> MIToSUnit;
public:
- // The AliasAnalysis parameter can be nullptr.
+ // The AAResults parameter can be nullptr.
VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- AliasAnalysis *AA);
+ AAResults *AA);
virtual ~VLIWPacketizerList();
diff --git a/linux-x64/clang/include/llvm/CodeGen/DIE.h b/linux-x64/clang/include/llvm/CodeGen/DIE.h
index 684f9e4..3efef6e 100644
--- a/linux-x64/clang/include/llvm/CodeGen/DIE.h
+++ b/linux-x64/clang/include/llvm/CodeGen/DIE.h
@@ -78,7 +78,7 @@
/// object.
class DIEAbbrev : public FoldingSetNode {
/// Unique number for node.
- unsigned Number;
+ unsigned Number = 0;
/// Dwarf tag code.
dwarf::Tag Tag;
@@ -190,7 +190,7 @@
uint64_t getValue() const { return Integer; }
void setValue(uint64_t Val) { Integer = Val; }
- void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -207,7 +207,7 @@
/// Get MCExpr.
const MCExpr *getValue() const { return Expr; }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -224,7 +224,7 @@
/// Get MCSymbol.
const MCSymbol *getValue() const { return Label; }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -242,11 +242,12 @@
: CU(TheCU), Index(Idx) {}
/// EmitValue - Emit base type reference.
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
/// SizeOf - Determine size of the base type reference in bytes.
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
+ uint64_t getIndex() const { return Index; }
};
//===--------------------------------------------------------------------===//
@@ -259,7 +260,7 @@
public:
DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) : LabelHi(Hi), LabelLo(Lo) {}
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -278,7 +279,7 @@
/// Grab the string out of the object.
StringRef getString() const { return S.getString(); }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -300,7 +301,7 @@
/// Grab the string out of the object.
StringRef getString() const { return S; }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -319,7 +320,7 @@
DIE &getEntry() const { return *Entry; }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -338,7 +339,7 @@
/// Grab the current index out.
size_t getValue() const { return Index; }
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -382,12 +383,12 @@
static_assert(std::is_standard_layout<T>::value ||
std::is_pointer<T>::value,
"Expected standard layout or pointer");
- new (reinterpret_cast<void *>(Val.buffer)) T(V);
+ new (reinterpret_cast<void *>(&Val)) T(V);
}
- template <class T> T *get() { return reinterpret_cast<T *>(Val.buffer); }
+ template <class T> T *get() { return reinterpret_cast<T *>(&Val); }
template <class T> const T *get() const {
- return reinterpret_cast<const T *>(Val.buffer);
+ return reinterpret_cast<const T *>(&Val);
}
template <class T> void destruct() { get<T>()->~T(); }
@@ -485,7 +486,7 @@
#include "llvm/CodeGen/DIEValue.def"
/// Emit value via the Dwarf writer.
- void EmitValue(const AsmPrinter *AP) const;
+ void emitValue(const AsmPrinter *AP) const;
/// Return the size of a value in bytes.
unsigned SizeOf(const AsmPrinter *AP) const;
@@ -550,6 +551,25 @@
return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
}
+ void takeNodes(IntrusiveBackList<T> &Other) {
+ if (Other.empty())
+ return;
+
+ T *FirstNode = static_cast<T *>(Other.Last->Next.getPointer());
+ T *IterNode = FirstNode;
+ do {
+ // Keep a pointer to the node and increment the iterator.
+ T *TmpNode = IterNode;
+ IterNode = static_cast<T *>(IterNode->Next.getPointer());
+
+ // Unlink the node and push it back to this list.
+ TmpNode->Next.setPointerAndInt(TmpNode, true);
+ push_back(*TmpNode);
+ } while (IterNode != FirstNode);
+
+ Other.Last = nullptr;
+ }
+
class const_iterator;
class iterator
: public iterator_facade_base<iterator, std::forward_iterator_tag, T> {
@@ -570,7 +590,6 @@
T &operator*() const { return *static_cast<T *>(N); }
bool operator==(const iterator &X) const { return N == X.N; }
- bool operator!=(const iterator &X) const { return N != X.N; }
};
class const_iterator
@@ -593,7 +612,6 @@
const T &operator*() const { return *static_cast<const T *>(N); }
bool operator==(const const_iterator &X) const { return N == X.N; }
- bool operator!=(const const_iterator &X) const { return N != X.N; }
};
iterator begin() {
@@ -685,6 +703,10 @@
return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
}
+ /// Take ownership of the nodes in \p Other, and append them to the back of
+ /// the list.
+ void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); }
+
value_range values() {
return make_range(value_iterator(List.begin()), value_iterator(List.end()));
}
@@ -765,7 +787,7 @@
/// Get the absolute offset within the .debug_info or .debug_types section
/// for this DIE.
- unsigned getDebugSectionOffset() const;
+ uint64_t getDebugSectionOffset() const;
/// Compute the offset of this DIE and all its children.
///
@@ -841,14 +863,11 @@
/// a valid section depending on the client that is emitting DWARF.
MCSection *Section;
uint64_t Offset; /// .debug_info or .debug_types absolute section offset.
- uint32_t Length; /// The length in bytes of all of the DIEs in this unit.
- const uint16_t Version; /// The Dwarf version number for this unit.
- const uint8_t AddrSize; /// The size in bytes of an address for this unit.
protected:
virtual ~DIEUnit() = default;
public:
- DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag);
+ explicit DIEUnit(dwarf::Tag UnitTag);
DIEUnit(const DIEUnit &RHS) = delete;
DIEUnit(DIEUnit &&RHS) = delete;
void operator=(const DIEUnit &RHS) = delete;
@@ -870,19 +889,14 @@
///
/// \returns Section pointer which can be NULL.
MCSection *getSection() const { return Section; }
- void setDebugSectionOffset(unsigned O) { Offset = O; }
- unsigned getDebugSectionOffset() const { return Offset; }
- void setLength(uint64_t L) { Length = L; }
- uint64_t getLength() const { return Length; }
- uint16_t getDwarfVersion() const { return Version; }
- uint16_t getAddressSize() const { return AddrSize; }
+ void setDebugSectionOffset(uint64_t O) { Offset = O; }
+ uint64_t getDebugSectionOffset() const { return Offset; }
DIE &getUnitDie() { return Die; }
const DIE &getUnitDie() const { return Die; }
};
struct BasicDIEUnit final : DIEUnit {
- BasicDIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag)
- : DIEUnit(Version, AddrSize, UnitTag) {}
+ explicit BasicDIEUnit(dwarf::Tag UnitTag) : DIEUnit(UnitTag) {}
};
//===--------------------------------------------------------------------===//
@@ -898,6 +912,9 @@
///
unsigned ComputeSize(const AsmPrinter *AP) const;
+ // TODO: move setSize() and Size to DIEValueList.
+ void setSize(unsigned size) { Size = size; }
+
/// BestForm - Choose the best form for data.
///
dwarf::Form BestForm(unsigned DwarfVersion) const {
@@ -913,7 +930,7 @@
return dwarf::DW_FORM_block;
}
- void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
@@ -932,6 +949,9 @@
///
unsigned ComputeSize(const AsmPrinter *AP) const;
+ // TODO: move setSize() and Size to DIEValueList.
+ void setSize(unsigned size) { Size = size; }
+
/// BestForm - Choose the best form for data.
///
dwarf::Form BestForm() const {
@@ -944,7 +964,7 @@
return dwarf::DW_FORM_block;
}
- void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+ void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
diff --git a/linux-x64/clang/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/linux-x64/clang/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index 7eec75b..bca6065 100644
--- a/linux-x64/clang/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/linux-x64/clang/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -12,16 +12,36 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include <utility>
namespace llvm {
class DILocalVariable;
+class DILocation;
+class DINode;
class MachineFunction;
class MachineInstr;
class TargetRegisterInfo;
+/// Record instruction ordering so we can query their relative positions within
+/// a function. Meta instructions are given the same ordinal as the preceding
+/// non-meta instruction. Class state is invalid if MF is modified after
+/// calling initialize.
+class InstructionOrdering {
+public:
+ void initialize(const MachineFunction &MF);
+ void clear() { InstNumberMap.clear(); }
+
+ /// Check if instruction \p A comes before \p B, where \p A and \p B both
+ /// belong to the MachineFunction passed to initialize().
+ bool isBefore(const MachineInstr *A, const MachineInstr *B) const;
+
+private:
+ /// Each instruction is assigned an order number.
+ DenseMap<const MachineInstr *, unsigned> InstNumberMap;
+};
+
/// For each user variable, keep a list of instruction ranges where this
/// variable is accessible. The variables are listed in order of appearance.
class DbgValueHistoryMap {
@@ -51,6 +71,8 @@
/// register-described debug values that have their end index
/// set to this entry's position in the entry vector.
class Entry {
+ friend DbgValueHistoryMap;
+
public:
enum EntryKind { DbgValue, Clobber };
@@ -88,6 +110,9 @@
return Entries[Index];
}
+ /// Drop location ranges which exist entirely outside each variable's scope.
+ void trimLocationRanges(const MachineFunction &MF, LexicalScopes &LScopes,
+ const InstructionOrdering &Ordering);
bool empty() const { return VarEntries.empty(); }
void clear() { VarEntries.clear(); }
EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
diff --git a/linux-x64/clang/include/llvm/CodeGen/DebugHandlerBase.h b/linux-x64/clang/include/llvm/CodeGen/DebugHandlerBase.h
index 4008d59..45823b2 100644
--- a/linux-x64/clang/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/linux-x64/clang/include/llvm/CodeGen/DebugHandlerBase.h
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
namespace llvm {
@@ -110,28 +110,37 @@
virtual void endFunctionImpl(const MachineFunction *MF) = 0;
virtual void skippedNonDebugFunction() {}
+private:
+ InstructionOrdering InstOrdering;
+
// AsmPrinterHandler overrides.
public:
+ void beginModule(Module *M) override;
+
void beginInstruction(const MachineInstr *MI) override;
void endInstruction() override;
void beginFunction(const MachineFunction *MF) override;
void endFunction(const MachineFunction *MF) override;
+ void beginBasicBlock(const MachineBasicBlock &MBB) override;
+ void endBasicBlock(const MachineBasicBlock &MBB) override;
+
/// Return Label preceding the instruction.
MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
/// Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- /// Return the function-local offset of an instruction. A label for the
- /// instruction \p MI should exist (\ref getLabelAfterInsn).
- const MCExpr *getFunctionLocalOffsetAfterInsn(const MachineInstr *MI);
-
/// If this type is derived from a base type then return base type size.
static uint64_t getBaseTypeSize(const DIType *Ty);
+
+ /// Return true if type encoding is unsigned.
+ static bool isUnsignedDIType(const DIType *Ty);
+
+ const InstructionOrdering &getInstOrdering() const { return InstOrdering; }
};
-}
+} // namespace llvm
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/DwarfStringPoolEntry.h b/linux-x64/clang/include/llvm/CodeGen/DwarfStringPoolEntry.h
index e189352..abeba62 100644
--- a/linux-x64/clang/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/linux-x64/clang/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -21,7 +21,7 @@
static constexpr unsigned NotIndexed = -1;
MCSymbol *Symbol;
- unsigned Offset;
+ uint64_t Offset;
unsigned Index;
bool isIndexed() const { return Index != NotIndexed; }
@@ -47,7 +47,7 @@
assert(getMapEntry()->second.Symbol && "No symbol available!");
return getMapEntry()->second.Symbol;
}
- unsigned getOffset() const { return getMapEntry()->second.Offset; }
+ uint64_t getOffset() const { return getMapEntry()->second.Offset; }
bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
unsigned getIndex() const {
assert(isIndexed());
diff --git a/linux-x64/clang/include/llvm/CodeGen/EdgeBundles.h b/linux-x64/clang/include/llvm/CodeGen/EdgeBundles.h
index 28cdf54..b269560 100644
--- a/linux-x64/clang/include/llvm/CodeGen/EdgeBundles.h
+++ b/linux-x64/clang/include/llvm/CodeGen/EdgeBundles.h
@@ -17,7 +17,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
diff --git a/linux-x64/clang/include/llvm/CodeGen/ExecutionDomainFix.h b/linux-x64/clang/include/llvm/CodeGen/ExecutionDomainFix.h
index 6836678..c87d4f9 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ExecutionDomainFix.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ExecutionDomainFix.h
@@ -30,7 +30,6 @@
namespace llvm {
-class MachineBasicBlock;
class MachineInstr;
class TargetInstrInfo;
@@ -81,10 +80,20 @@
}
/// Mark domain as available.
- void addDomain(unsigned domain) { AvailableDomains |= 1u << domain; }
+ void addDomain(unsigned domain) {
+ assert(domain <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ AvailableDomains |= 1u << domain;
+ }
// Restrict to a single domain available.
- void setSingleDomain(unsigned domain) { AvailableDomains = 1u << domain; }
+ void setSingleDomain(unsigned domain) {
+ assert(domain <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ AvailableDomains = 1u << domain;
+ }
/// Return bitmask of domains that are available and in mask.
unsigned getCommonDomains(unsigned mask) const {
diff --git a/linux-x64/clang/include/llvm/CodeGen/FastISel.h b/linux-x64/clang/include/llvm/CodeGen/FastISel.h
index f09b59d..81c1d6a 100644
--- a/linux-x64/clang/include/llvm/CodeGen/FastISel.h
+++ b/linux-x64/clang/include/llvm/CodeGen/FastISel.h
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -86,16 +85,16 @@
const Value *Callee = nullptr;
MCSymbol *Symbol = nullptr;
ArgListTy Args;
- ImmutableCallSite *CS = nullptr;
+ const CallBase *CB = nullptr;
MachineInstr *Call = nullptr;
- unsigned ResultReg = 0;
+ Register ResultReg;
unsigned NumResultRegs = 0;
SmallVector<Value *, 16> OutVals;
SmallVector<ISD::ArgFlagsTy, 16> OutFlags;
- SmallVector<unsigned, 16> OutRegs;
+ SmallVector<Register, 16> OutRegs;
SmallVector<ISD::InputArg, 4> Ins;
- SmallVector<unsigned, 4> InRegs;
+ SmallVector<Register, 4> InRegs;
CallLoweringInfo()
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
@@ -103,14 +102,14 @@
CallLoweringInfo &setCallee(Type *ResultTy, FunctionType *FuncTy,
const Value *Target, ArgListTy &&ArgsList,
- ImmutableCallSite &Call) {
+ const CallBase &Call) {
RetTy = ResultTy;
Callee = Target;
IsInReg = Call.hasRetAttr(Attribute::InReg);
DoesNotReturn = Call.doesNotReturn();
IsVarArg = FuncTy->isVarArg();
- IsReturnValueUsed = !Call.getInstruction()->use_empty();
+ IsReturnValueUsed = !Call.use_empty();
RetSExt = Call.hasRetAttr(Attribute::SExt);
RetZExt = Call.hasRetAttr(Attribute::ZExt);
@@ -118,23 +117,23 @@
Args = std::move(ArgsList);
NumFixedArgs = FuncTy->getNumParams();
- CS = &Call;
+ CB = &Call;
return *this;
}
CallLoweringInfo &setCallee(Type *ResultTy, FunctionType *FuncTy,
MCSymbol *Target, ArgListTy &&ArgsList,
- ImmutableCallSite &Call,
+ const CallBase &Call,
unsigned FixedArgs = ~0U) {
RetTy = ResultTy;
- Callee = Call.getCalledValue();
+ Callee = Call.getCalledOperand();
Symbol = Target;
IsInReg = Call.hasRetAttr(Attribute::InReg);
DoesNotReturn = Call.doesNotReturn();
IsVarArg = FuncTy->isVarArg();
- IsReturnValueUsed = !Call.getInstruction()->use_empty();
+ IsReturnValueUsed = !Call.use_empty();
RetSExt = Call.hasRetAttr(Attribute::SExt);
RetZExt = Call.hasRetAttr(Attribute::ZExt);
@@ -142,7 +141,7 @@
Args = std::move(ArgsList);
NumFixedArgs = (FixedArgs == ~0U) ? FuncTy->getNumParams() : FixedArgs;
- CS = &Call;
+ CB = &Call;
return *this;
}
@@ -199,7 +198,7 @@
};
protected:
- DenseMap<const Value *, unsigned> LocalValueMap;
+ DenseMap<const Value *, Register> LocalValueMap;
FunctionLoweringInfo &FuncInfo;
MachineFunction *MF;
MachineRegisterInfo &MRI;
@@ -225,10 +224,6 @@
/// makes sense (for example, on function calls)
MachineInstr *EmitStartPt;
- /// Last local value flush point. On a subsequent flush, no local value will
- /// sink past this point.
- MachineBasicBlock::iterator LastFlushPoint;
-
public:
virtual ~FastISel();
@@ -247,7 +242,7 @@
/// be appended.
void startNewBlock();
- /// Flush the local value map and sink local values if possible.
+ /// Flush the local value map.
void finishBasicBlock();
/// Return current debug location information.
@@ -270,16 +265,16 @@
/// Create a virtual register and arrange for it to be assigned the
/// value for the given LLVM value.
- unsigned getRegForValue(const Value *V);
+ Register getRegForValue(const Value *V);
/// Look up the value to see if its value is already cached in a
/// register. It may be defined by instructions across blocks or defined
/// locally.
- unsigned lookUpRegForValue(const Value *V);
+ Register lookUpRegForValue(const Value *V);
/// This is a wrapper around getRegForValue that also takes care of
/// truncating or sign-extending the given getelementptr index value.
- std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
+ std::pair<Register, bool> getRegForGEPIndex(const Value *Idx);
/// We're checking to see if we can fold \p LI into \p FoldInst. Note
/// that we could have a sequence where multiple LLVM IR instructions are
@@ -314,10 +309,7 @@
void removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E);
- struct SavePoint {
- MachineBasicBlock::iterator InsertPt;
- DebugLoc DL;
- };
+ using SavePoint = MachineBasicBlock::iterator;
/// Prepare InsertPt to begin inserting instructions into the local
/// value area and return the old insert position.
@@ -374,7 +366,7 @@
/// It first tries to emit an instruction with an immediate operand using
/// fastEmit_ri. If that fails, it materializes the immediate into a register
/// and try fastEmit_rr instead.
- unsigned fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill,
+ Register fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill,
uint64_t Imm, MVT ImmType);
/// This method is called by target-independent code to request that an
@@ -389,66 +381,66 @@
/// Emit a MachineInstr with no operands and a result register in the
/// given register class.
- unsigned fastEmitInst_(unsigned MachineInstOpcode,
+ Register fastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass *RC);
/// Emit a MachineInstr with one register operand and a result register
/// in the given register class.
- unsigned fastEmitInst_r(unsigned MachineInstOpcode,
+ Register fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill);
/// Emit a MachineInstr with two register operands and a result
/// register in the given register class.
- unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+ Register fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1, bool Op1IsKill);
/// Emit a MachineInstr with three register operands and a result
/// register in the given register class.
- unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
+ Register fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1, bool Op1IsKill,
unsigned Op2, bool Op2IsKill);
/// Emit a MachineInstr with a register operand, an immediate, and a
/// result register in the given register class.
- unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
+ Register fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm);
/// Emit a MachineInstr with one register operand and two immediate
/// operands.
- unsigned fastEmitInst_rii(unsigned MachineInstOpcode,
+ Register fastEmitInst_rii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm1, uint64_t Imm2);
/// Emit a MachineInstr with a floating point immediate, and a result
/// register in the given register class.
- unsigned fastEmitInst_f(unsigned MachineInstOpcode,
+ Register fastEmitInst_f(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
const ConstantFP *FPImm);
/// Emit a MachineInstr with two register operands, an immediate, and a
/// result register in the given register class.
- unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
+ Register fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1, bool Op1IsKill,
uint64_t Imm);
/// Emit a MachineInstr with a single immediate operand, and a result
/// register in the given register class.
- unsigned fastEmitInst_i(unsigned MachineInstOpcode,
+ Register fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm);
/// Emit a MachineInstr for an extract_subreg from a specified index of
/// a superregister to a specified type.
- unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ Register fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint32_t Idx);
/// Emit MachineInstrs to compute the value of Op with all but the
/// least significant bit set to zero.
- unsigned fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill);
+ Register fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill);
/// Emit an unconditional branch to the given block, unless it is the
/// immediate (fall-through) successor, and update the CFG.
@@ -466,14 +458,14 @@
/// NOTE: This is only necessary because we might select a block that uses a
/// value before we select the block that defines the value. It might be
/// possible to fix this by selecting blocks in reverse postorder.
- void updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs = 1);
+ void updateValueMap(const Value *I, Register Reg, unsigned NumRegs = 1);
- unsigned createResultReg(const TargetRegisterClass *RC);
+ Register createResultReg(const TargetRegisterClass *RC);
/// Try to constrain Op so that it is usable by argument OpNum of the
/// provided MCInstrDesc. If this fails, create a new virtual register in the
/// correct class and COPY the value there.
- unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+ Register constrainOperandRegClass(const MCInstrDesc &II, Register Op,
unsigned OpNum);
/// Emit a constant in a register using target-specific logic, such as
@@ -511,18 +503,6 @@
unsigned NumArgs);
bool lowerCallTo(CallLoweringInfo &CLI);
- bool isCommutativeIntrinsic(IntrinsicInst const *II) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return true;
- default:
- return false;
- }
- }
-
bool lowerCall(const CallInst *I);
/// Select and emit code for a binary operator instruction, which has
/// an opcode which directly corresponds to the given ISD opcode.
@@ -534,12 +514,17 @@
bool selectCall(const User *I);
bool selectIntrinsicCall(const IntrinsicInst *II);
bool selectBitCast(const User *I);
+ bool selectFreeze(const User *I);
bool selectCast(const User *I, unsigned Opcode);
bool selectExtractValue(const User *U);
- bool selectInsertValue(const User *I);
bool selectXRayCustomEvent(const CallInst *II);
bool selectXRayTypedEvent(const CallInst *II);
+ bool shouldOptForSize(const MachineFunction *MF) const {
+ // TODO: Implement PGSO.
+ return MF->getFunction().hasOptSize();
+ }
+
private:
/// Handle PHI nodes in successor blocks.
///
@@ -552,12 +537,12 @@
/// Helper for materializeRegForValue to materialize a constant in a
/// target-independent way.
- unsigned materializeConstant(const Value *V, MVT VT);
+ Register materializeConstant(const Value *V, MVT VT);
/// Helper for getRegForVale. This function is called when the value
/// isn't already available in a register and must be materialized with new
/// instructions.
- unsigned materializeRegForValue(const Value *V, MVT VT);
+ Register materializeRegForValue(const Value *V, MVT VT);
/// Clears LocalValueMap and moves the area for the new local variables
/// to the beginning of the block. It helps to avoid spilling cached variables
@@ -567,20 +552,6 @@
/// Removes dead local value instructions after SavedLastLocalvalue.
void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
- struct InstOrderMap {
- DenseMap<MachineInstr *, unsigned> Orders;
- MachineInstr *FirstTerminator = nullptr;
- unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max();
-
- void initialize(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator LastFlushPoint);
- };
-
- /// Sinks the local value materialization instruction LocalMI to its first use
- /// in the basic block, or deletes it if it is not used.
- void sinkLocalValueMaterialization(MachineInstr &LocalMI, unsigned DefReg,
- InstOrderMap &OrderMap);
-
/// Insertion point before trying to select the current instruction.
MachineBasicBlock::iterator SavedInsertPt;
diff --git a/linux-x64/clang/include/llvm/CodeGen/FaultMaps.h b/linux-x64/clang/include/llvm/CodeGen/FaultMaps.h
index a1e2349..da56c4d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/FaultMaps.h
+++ b/linux-x64/clang/include/llvm/CodeGen/FaultMaps.h
@@ -36,7 +36,8 @@
static const char *faultTypeToString(FaultKind);
- void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel);
+ void recordFaultingOp(FaultKind FaultTy, const MCSymbol *FaultingLabel,
+ const MCSymbol *HandlerLabel);
void serializeToFaultMapSection();
void reset() {
FunctionInfos.clear();
diff --git a/linux-x64/clang/include/llvm/CodeGen/FunctionLoweringInfo.h b/linux-x64/clang/include/llvm/CodeGen/FunctionLoweringInfo.h
index fb60191..b6bde02 100644
--- a/linux-x64/clang/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -13,14 +13,13 @@
#ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
#define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
-#include "llvm/ADT/APInt.h"
+
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -37,6 +36,7 @@
class Argument;
class BasicBlock;
class BranchProbabilityInfo;
+class LegacyDivergenceAnalysis;
class Function;
class Instruction;
class MachineFunction;
@@ -67,7 +67,7 @@
/// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
/// allocated to hold a pointer to the hidden sret parameter.
- unsigned DemoteRegister;
+ Register DemoteRegister;
/// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
@@ -75,52 +75,49 @@
/// ValueMap - Since we emit code for the function a basic block at a time,
/// we must remember which virtual registers hold the values for
/// cross-basic-block values.
- DenseMap<const Value *, unsigned> ValueMap;
+ DenseMap<const Value *, Register> ValueMap;
/// VirtReg2Value map is needed by the Divergence Analysis driven
/// instruction selection. It is reverted ValueMap. It is computed
/// in lazy style - on demand. It is used to get the Value corresponding
/// to the live in virtual register and is called from the
/// TargetLowerinInfo::isSDNodeSourceOfDivergence.
- DenseMap<unsigned, const Value*> VirtReg2Value;
+ DenseMap<Register, const Value*> VirtReg2Value;
/// This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence
/// to get the Value corresponding to the live-in virtual register.
- const Value * getValueFromVirtualReg(unsigned Vreg);
+ const Value *getValueFromVirtualReg(Register Vreg);
/// Track virtual registers created for exception pointers.
- DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
+ DenseMap<const Value *, Register> CatchPadExceptionPointers;
- /// Keep track of frame indices allocated for statepoints as they could be
- /// used across basic block boundaries. This struct is more complex than a
- /// simple map because the stateopint lowering code de-duplicates gc pointers
- /// based on their SDValue (so %p and (bitcast %p to T) will get the same
- /// slot), and we track that here.
-
- struct StatepointSpillMap {
- using SlotMapTy = DenseMap<const Value *, Optional<int>>;
-
- /// Maps uniqued llvm IR values to the slots they were spilled in. If a
- /// value is mapped to None it means we visited the value but didn't spill
- /// it (because it was a constant, for instance).
- SlotMapTy SlotMap;
-
- /// Maps llvm IR values to the values they were de-duplicated to.
- DenseMap<const Value *, const Value *> DuplicateMap;
-
- SlotMapTy::const_iterator find(const Value *V) const {
- auto DuplIt = DuplicateMap.find(V);
- if (DuplIt != DuplicateMap.end())
- V = DuplIt->second;
- return SlotMap.find(V);
- }
-
- SlotMapTy::const_iterator end() const { return SlotMap.end(); }
+ /// Helper object to track which of three possible relocation mechanisms are
+ /// used for a particular value being relocated over a statepoint.
+ struct StatepointRelocationRecord {
+ enum RelocType {
+ // Value did not need to be relocated and can be used directly.
+ NoRelocate,
+ // Value was spilled to stack and needs filled at the gc.relocate.
+ Spill,
+ // Value was lowered to tied def and gc.relocate should be replaced with
+ // copy from vreg.
+ VReg,
+ } type = NoRelocate;
+ // Payload contains either frame index of the stack slot in which the value
+ // was spilled, or virtual register which contains the re-definition.
+ union payload_t {
+ payload_t() : FI(-1) {}
+ int FI;
+ Register Reg;
+ } payload;
};
- /// Maps gc.statepoint instructions to their corresponding StatepointSpillMap
- /// instances.
- DenseMap<const Instruction *, StatepointSpillMap> StatepointSpillMaps;
+ /// Keep track of each value which was relocated and the strategy used to
+ /// relocate that value. This information is required when visiting
+ /// gc.relocates which may appear in following blocks.
+ using StatepointSpillMapTy =
+ DenseMap<const Value *, StatepointRelocationRecord>;
+ DenseMap<const Instruction *, StatepointSpillMapTy> StatepointRelocationMaps;
/// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
/// the entry block. This allows the allocas to be efficiently referenced
@@ -139,9 +136,9 @@
BitVector DescribedArgs;
/// RegFixups - Registers which need to be replaced after isel is done.
- DenseMap<unsigned, unsigned> RegFixups;
+ DenseMap<Register, Register> RegFixups;
- DenseSet<unsigned> RegsWithFixups;
+ DenseSet<Register> RegsWithFixups;
/// StatepointStackSlots - A list of temporary stack slots (frame indices)
/// used to spill values at a statepoint. We store them here to enable
@@ -195,21 +192,21 @@
/// isExportedInst - Return true if the specified value is an instruction
/// exported from its block.
- bool isExportedInst(const Value *V) {
+ bool isExportedInst(const Value *V) const {
return ValueMap.count(V);
}
- unsigned CreateReg(MVT VT, bool isDivergent = false);
+ Register CreateReg(MVT VT, bool isDivergent = false);
- unsigned CreateRegs(const Value *V);
+ Register CreateRegs(const Value *V);
- unsigned CreateRegs(Type *Ty, bool isDivergent = false);
+ Register CreateRegs(Type *Ty, bool isDivergent = false);
- unsigned InitializeRegForValue(const Value *V) {
+ Register InitializeRegForValue(const Value *V) {
// Tokens never live in vregs.
if (V->getType()->isTokenTy())
return 0;
- unsigned &R = ValueMap[V];
+ Register &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
assert(VirtReg2Value.empty());
return R = CreateRegs(V);
@@ -217,7 +214,7 @@
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid.
- const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg) {
+ const LiveOutInfo *GetLiveOutRegInfo(Register Reg) {
if (!LiveOutRegInfo.inBounds(Reg))
return nullptr;
@@ -233,10 +230,10 @@
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
/// the larger bit width by zero extension. The bit width must be no smaller
/// than the LiveOutInfo's existing bit width.
- const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth);
+ const LiveOutInfo *GetLiveOutRegInfo(Register Reg, unsigned BitWidth);
/// AddLiveOutRegInfo - Adds LiveOutInfo for a register.
- void AddLiveOutRegInfo(unsigned Reg, unsigned NumSignBits,
+ void AddLiveOutRegInfo(Register Reg, unsigned NumSignBits,
const KnownBits &Known) {
// Only install this information if it tells us something.
if (NumSignBits == 1 && Known.isUnknown())
@@ -257,11 +254,11 @@
/// called when a block is visited before all of its predecessors.
void InvalidatePHILiveOutRegInfo(const PHINode *PN) {
// PHIs with no uses have no ValueMap entry.
- DenseMap<const Value*, unsigned>::const_iterator It = ValueMap.find(PN);
+ DenseMap<const Value*, Register>::const_iterator It = ValueMap.find(PN);
if (It == ValueMap.end())
return;
- unsigned Reg = It->second;
+ Register Reg = It->second;
if (Reg == 0)
return;
@@ -276,12 +273,10 @@
/// getArgumentFrameIndex - Get frame index for the byval argument.
int getArgumentFrameIndex(const Argument *A);
- unsigned getCatchPadExceptionPointerVReg(const Value *CPI,
+ Register getCatchPadExceptionPointerVReg(const Value *CPI,
const TargetRegisterClass *RC);
private:
- void addSEHHandlersForLPads(ArrayRef<const LandingPadInst *> LPads);
-
/// LiveOutRegInfo - Information about live out vregs.
IndexedMap<LiveOutInfo, VirtReg2IndexFunctor> LiveOutRegInfo;
};
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index 5a44e67..f76dec5 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -16,14 +16,12 @@
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CodeGen.h"
namespace llvm {
+class MachineBasicBlock;
/// A class that wraps MachineInstrs and derives from FoldingSetNode in order to
/// be uniqued in a CSEMap. The tradeoff here is extra memory allocations for
@@ -120,6 +118,8 @@
void setMF(MachineFunction &MF);
+ Error verify();
+
/// Records a newly created inst in a list and lazily insert it to the CSEMap.
/// Sometimes, this method might be called with a partially constructed
/// MachineInstr,
@@ -173,14 +173,16 @@
: ID(ID), MRI(MRI) {}
// Profiling methods.
const GISelInstProfileBuilder &addNodeIDOpcode(unsigned Opc) const;
- const GISelInstProfileBuilder &addNodeIDRegType(const LLT &Ty) const;
- const GISelInstProfileBuilder &addNodeIDRegType(const unsigned) const;
+ const GISelInstProfileBuilder &addNodeIDRegType(const LLT Ty) const;
+ const GISelInstProfileBuilder &addNodeIDRegType(const Register) const;
const GISelInstProfileBuilder &
addNodeIDRegType(const TargetRegisterClass *RC) const;
const GISelInstProfileBuilder &addNodeIDRegType(const RegisterBank *RB) const;
- const GISelInstProfileBuilder &addNodeIDRegNum(unsigned Reg) const;
+ const GISelInstProfileBuilder &addNodeIDRegNum(Register Reg) const;
+
+ const GISelInstProfileBuilder &addNodeIDReg(Register Reg) const;
const GISelInstProfileBuilder &addNodeIDImmediate(int64_t Imm) const;
const GISelInstProfileBuilder &
@@ -220,9 +222,7 @@
public:
static char ID;
- GISelCSEAnalysisWrapperPass() : MachineFunctionPass(ID) {
- initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ GISelCSEAnalysisWrapperPass();
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CallLowering.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CallLowering.h
index d8d15bd..57ff390 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -17,9 +17,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetCallingConv.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
@@ -27,14 +29,14 @@
namespace llvm {
+class CallBase;
class DataLayout;
class Function;
+class FunctionLoweringInfo;
class MachineIRBuilder;
-class MachineOperand;
struct MachinePointerInfo;
class MachineRegisterInfo;
class TargetLowering;
-class Type;
class Value;
class CallLowering {
@@ -42,38 +44,104 @@
virtual void anchor();
public:
- struct ArgInfo {
- SmallVector<Register, 4> Regs;
+ struct BaseArgInfo {
Type *Ty;
- ISD::ArgFlagsTy Flags;
+ SmallVector<ISD::ArgFlagsTy, 4> Flags;
bool IsFixed;
+ BaseArgInfo(Type *Ty,
+ ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
+ bool IsFixed = true)
+ : Ty(Ty), Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) {}
+
+ BaseArgInfo() : Ty(nullptr), IsFixed(false) {}
+ };
+
+ struct ArgInfo : public BaseArgInfo {
+ SmallVector<Register, 4> Regs;
+ // If the argument had to be split into multiple parts according to the
+ // target calling convention, then this contains the original vregs
+ // if the argument was an incoming arg.
+ SmallVector<Register, 2> OrigRegs;
+
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
- : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
- IsFixed(IsFixed) {
+ ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
+ bool IsFixed = true)
+ : BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) {
+ if (!Regs.empty() && Flags.empty())
+ this->Flags.push_back(ISD::ArgFlagsTy());
// FIXME: We should have just one way of saying "no register".
- assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
+ assert(((Ty->isVoidTy() || Ty->isEmptyTy()) ==
+ (Regs.empty() || Regs[0] == 0)) &&
"only void types should have no register");
}
+
+ ArgInfo() : BaseArgInfo() {}
+ };
+
+ struct CallLoweringInfo {
+ /// Calling convention to be used for the call.
+ CallingConv::ID CallConv = CallingConv::C;
+
+ /// Destination of the call. It should be either a register, globaladdress,
+ /// or externalsymbol.
+ MachineOperand Callee = MachineOperand::CreateImm(0);
+
+ /// Descriptor for the return type of the function.
+ ArgInfo OrigRet;
+
+ /// List of descriptors of the arguments passed to the function.
+ SmallVector<ArgInfo, 8> OrigArgs;
+
+ /// Valid if the call has a swifterror inout parameter, and contains the
+ /// vreg that the swifterror should be copied into after the call.
+ Register SwiftErrorVReg;
+
+ MDNode *KnownCallees = nullptr;
+
+ /// True if the call must be tail call optimized.
+ bool IsMustTailCall = false;
+
+ /// True if the call passes all target-independent checks for tail call
+ /// optimization.
+ bool IsTailCall = false;
+
+ /// True if the call was lowered as a tail call. This is consumed by the
+ /// legalizer. This allows the legalizer to lower libcalls as tail calls.
+ bool LoweredTailCall = false;
+
+ /// True if the call is to a vararg function.
+ bool IsVarArg = false;
+
+ /// True if the function's return value can be lowered to registers.
+ bool CanLowerReturn = true;
+
+ /// VReg to hold the hidden sret parameter.
+ Register DemoteRegister;
+
+ /// The stack index for sret demotion.
+ int DemoteStackIndex;
};
/// Argument handling is mostly uniform between the four places that
/// make these decisions: function formal arguments, call
/// instruction args, call instruction returns and function
/// returns. However, once a decision has been made on where an
- /// arugment should go, exactly what happens can vary slightly. This
+ /// argument should go, exactly what happens can vary slightly. This
/// class abstracts the differences.
struct ValueHandler {
- ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- CCAssignFn *AssignFn)
- : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {}
+ ValueHandler(bool IsIncoming, MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
+ : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn),
+ IsIncomingArgumentHandler(IsIncoming) {}
virtual ~ValueHandler() = default;
- /// Returns true if the handler is dealing with formal arguments,
- /// not with return values etc.
- virtual bool isArgumentHandler() const { return false; }
+ /// Returns true if the handler is dealing with incoming arguments,
+ /// i.e. those that move values from some physical location to vregs.
+ bool isIncomingArgumentHandler() const {
+ return IsIncomingArgumentHandler;
+ }
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
@@ -96,6 +164,15 @@
uint64_t Size, MachinePointerInfo &MPO,
CCValAssign &VA) = 0;
+ /// An overload which takes an ArgInfo if additional information about
+ /// the arg is needed.
+ virtual void assignValueToAddress(const ArgInfo &Arg, Register Addr,
+ uint64_t Size, MachinePointerInfo &MPO,
+ CCValAssign &VA) {
+ assert(Arg.Regs.size() == 1);
+ assignValueToAddress(Arg.Regs[0], Addr, Size, MPO, VA);
+ }
+
/// Handle custom values, which may be passed into one or more of \p VAs.
/// \return The number of \p VAs that have been assigned after the first
/// one, and which should therefore be skipped from further
@@ -107,12 +184,15 @@
llvm_unreachable("Custom values not supported");
}
- Register extendRegister(Register ValReg, CCValAssign &VA);
+ /// Extend a register to the location type given in VA, capped at extending
+ /// to at most MaxSize bits. If MaxSizeBits is 0 then no maximum is set.
+ Register extendRegister(Register ValReg, CCValAssign &VA,
+ unsigned MaxSizeBits = 0);
virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, const ArgInfo &Info,
- CCState &State) {
- return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ ISD::ArgFlagsTy Flags, CCState &State) {
+ return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
}
MachineIRBuilder &MIRBuilder;
@@ -120,9 +200,22 @@
CCAssignFn *AssignFn;
private:
+ bool IsIncomingArgumentHandler;
virtual void anchor();
};
+ struct IncomingValueHandler : public ValueHandler {
+ IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(true, MIRBuilder, MRI, AssignFn) {}
+ };
+
+ struct OutgoingValueHandler : public ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(false, MIRBuilder, MRI, AssignFn) {}
+ };
+
protected:
/// Getter for generic TargetLowering class.
const TargetLowering *getTLI() const {
@@ -135,6 +228,17 @@
return static_cast<const XXXTargetLowering *>(TLI);
}
+ /// \returns Flags corresponding to the attributes on the \p ArgIdx-th
+ /// parameter of \p Call.
+ ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const;
+
+ /// Adds flags to \p Flags based off of the attributes in \p Attrs.
+ /// \p OpIdx is the index in \p Attrs to add flags from.
+ void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const;
+
template <typename FuncInfoTy>
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
const FuncInfoTy &FuncInfo) const;
@@ -158,11 +262,44 @@
MachineIRBuilder &MIRBuilder) const;
/// Invoke Handler::assignArg on each of the given \p Args and then use
- /// \p Callback to move them to the assigned locations.
+ /// \p Handler to move them to the assigned locations.
///
/// \return True if everything has succeeded, false otherwise.
- bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+ bool handleAssignments(MachineIRBuilder &MIRBuilder,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const;
+ bool handleAssignments(CCState &CCState,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder,
+ SmallVectorImpl<ArgInfo> &Args,
+ ValueHandler &Handler) const;
+
+ /// Analyze passed or returned values from a call, supplied in \p ArgInfo,
+ /// incorporating info about the passed values into \p CCState.
+ ///
+ /// Used to check if arguments are suitable for tail call lowering.
+ bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const;
+
+ /// \returns True if the calling convention for a callee and its caller pass
+ /// results in the same way. Typically used for tail call eligibility checks.
+ ///
+ /// \p Info is the CallLoweringInfo for the call.
+ /// \p MF is the MachineFunction for the caller.
+ /// \p InArgs contains the results of the call.
+ /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for
+ /// fixed arguments.
+ /// \p CalleeAssignFnVarArg is similar, but for varargs.
+ /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for
+ /// fixed arguments.
+ /// \p CallerAssignFnVarArg is similar, but for varargs.
+ bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const;
public:
CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
@@ -175,20 +312,73 @@
return false;
}
+ /// Load the returned value from the stack into virtual registers in \p VRegs.
+ /// It uses the frame index \p FI and the start offset from \p DemoteReg.
+ /// The loaded data size will be determined from \p RetTy.
+ void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const;
+
+ /// Store the return value given by \p VRegs into stack starting at the offset
+ /// specified in \p DemoteReg.
+ void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg) const;
+
+ /// Insert the hidden sret ArgInfo to the beginning of \p SplitArgs.
+ /// This function should be called from the target specific
+ /// lowerFormalArguments when \p F requires the sret demotion.
+ void insertSRetIncomingArgument(const Function &F,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ Register &DemoteReg, MachineRegisterInfo &MRI,
+ const DataLayout &DL) const;
+
+ /// For the call-base described by \p CB, insert the hidden sret ArgInfo to
+ /// the OrigArgs field of \p Info.
+ void insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const;
+
+ /// \return True if the return type described by \p Outs can be returned
+ /// without performing sret demotion.
+ bool checkReturn(CCState &CCInfo, SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const;
+
+ /// Get the type and the ArgFlags for the split components of \p RetTy as
+ /// returned by \c ComputeValueVTs.
+ void getReturnInfo(CallingConv::ID CallConv, Type *RetTy, AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const;
+
+ /// Toplevel function to check the return type based on the target calling
+ /// convention. \return True if the return value of \p MF can be returned
+ /// without performing sret demotion.
+ bool checkReturnTypeForCallConv(MachineFunction &MF) const;
+
+ /// This hook must be implemented to check whether the return values
+ /// described by \p Outs can fit into the return registers. If false
+ /// is returned, an sret-demotion is performed.
+ virtual bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const {
+ return true;
+ }
+
/// This hook must be implemented to lower outgoing return values, described
/// by \p Val, into the specified virtual registers \p VRegs.
/// This hook is used by GlobalISel.
///
+ /// \p FLI is required for sret demotion.
+ ///
/// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter
/// that needs to be implicitly returned.
///
/// \return True if the lowering succeeds, false otherwise.
virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const {
if (!supportSwiftError()) {
assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror");
- return lowerReturn(MIRBuilder, Val, VRegs);
+ return lowerReturn(MIRBuilder, Val, VRegs, FLI);
}
return false;
}
@@ -196,10 +386,13 @@
/// This hook behaves as the extended lowerReturn function, but for targets
/// that do not support swifterror value promotion.
virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const {
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
return false;
}
+ virtual bool fallBackToDAGISel(const Function &F) const { return false; }
+
/// This hook must be implemented to lower the incoming (formal)
/// arguments, described by \p VRegs, for GlobalISel. Each argument
/// must end up in the related virtual registers described by \p VRegs.
@@ -207,49 +400,23 @@
/// the second in \c VRegs[1], and so on. For each argument, there will be one
/// register for each non-aggregate type, as returned by \c computeValueLLTs.
/// \p MIRBuilder is set to the proper insertion for the argument
- /// lowering.
+ /// lowering. \p FLI is required for sret demotion.
///
/// \return True if the lowering succeeded, false otherwise.
virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
return false;
}
/// This hook must be implemented to lower the given call instruction,
/// including argument and return value marshalling.
///
- /// \p CallConv is the calling convention to be used for the call.
- ///
- /// \p Callee is the destination of the call. It should be either a register,
- /// globaladdress, or externalsymbol.
- ///
- /// \p OrigRet is a descriptor for the return type of the function.
- ///
- /// \p OrigArgs is a list of descriptors of the arguments passed to the
- /// function.
- ///
- /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
- /// parameter, and contains the vreg that the swifterror should be copied into
- /// after the call.
///
/// \return true if the lowering succeeded, false otherwise.
- virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs,
- Register SwiftErrorVReg) const {
- if (!supportSwiftError()) {
- assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror");
- return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs);
- }
- return false;
- }
-
- /// This hook behaves as the extended lowerCall function, but for targets that
- /// do not support swifterror value promotion.
- virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+ virtual bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
return false;
}
@@ -277,7 +444,7 @@
/// range of an immediate jump.
///
/// \return true if the lowering succeeded, false otherwise.
- bool lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+ bool lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &Call,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
std::function<unsigned()> GetCalleeReg) const;
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Combiner.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Combiner.h
index 12a1f97..efe8bdf 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -1,4 +1,4 @@
-//== ----- llvm/CodeGen/GlobalISel/Combiner.h --------------------- == //
+//== ----- llvm/CodeGen/GlobalISel/Combiner.h -------------------*- C++ -*-== //
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 0c50c9c..0d240e9 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -17,16 +17,24 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
class GISelChangeObserver;
class MachineIRBuilder;
+class MachineInstrBuilder;
class MachineRegisterInfo;
class MachineInstr;
class MachineOperand;
+class GISelKnownBits;
+class MachineDominatorTree;
+class LegalizerInfo;
+struct LegalityQuery;
+class TargetLowering;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -34,13 +42,73 @@
MachineInstr *MI;
};
+struct IndexedLoadStoreMatchInfo {
+ Register Addr;
+ Register Base;
+ Register Offset;
+ bool IsPre;
+};
+
+struct PtrAddChain {
+ int64_t Imm;
+ Register Base;
+};
+
+struct RegisterImmPair {
+ Register Reg;
+ int64_t Imm;
+};
+
+struct ShiftOfShiftedLogic {
+ MachineInstr *Logic;
+ MachineInstr *Shift2;
+ Register LogicNonShiftReg;
+ uint64_t ValSum;
+};
+
+using OperandBuildSteps =
+ SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
+struct InstructionBuildSteps {
+ unsigned Opcode = 0; /// The opcode for the produced instruction.
+ OperandBuildSteps OperandFns; /// Operands to be added to the instruction.
+ InstructionBuildSteps() = default;
+ InstructionBuildSteps(unsigned Opcode, const OperandBuildSteps &OperandFns)
+ : Opcode(Opcode), OperandFns(OperandFns) {}
+};
+
+struct InstructionStepsMatchInfo {
+ /// Describes instructions to be built during a combine.
+ SmallVector<InstructionBuildSteps, 2> InstrsToBuild;
+ InstructionStepsMatchInfo() = default;
+ InstructionStepsMatchInfo(
+ std::initializer_list<InstructionBuildSteps> InstrsToBuild)
+ : InstrsToBuild(InstrsToBuild) {}
+};
+
class CombinerHelper {
+protected:
MachineIRBuilder &Builder;
MachineRegisterInfo &MRI;
GISelChangeObserver &Observer;
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+ const LegalizerInfo *LI;
public:
- CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
+ CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ GISelKnownBits *KB = nullptr,
+ MachineDominatorTree *MDT = nullptr,
+ const LegalizerInfo *LI = nullptr);
+
+ GISelKnownBits *getKnownBits() const {
+ return KB;
+ }
+
+ const TargetLowering &getTargetLowering() const;
+
+ /// \return true if the combine is running prior to legalization, or if \p
+ /// Query is legal on the target.
+ bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const;
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -56,18 +124,381 @@
bool matchCombineCopy(MachineInstr &MI);
void applyCombineCopy(MachineInstr &MI);
+ /// Returns true if \p DefMI precedes \p UseMI or they are the same
+ /// instruction. Both must be in the same basic block.
+ bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI);
+
+ /// Returns true if \p DefMI dominates \p UseMI. By definition an
+ /// instruction dominates itself.
+ ///
+ /// If we haven't been provided with a MachineDominatorTree during
+ /// construction, this function returns a conservative result that tracks just
+ /// a single basic block.
+ bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI);
+
/// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI);
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
- bool matchCombineBr(MachineInstr &MI);
- bool tryCombineBr(MachineInstr &MI);
+ /// Combine \p MI into a pre-indexed or post-indexed load/store operation if
+ /// legal and the surrounding code makes it useful.
+ bool tryCombineIndexedLoadStore(MachineInstr &MI);
+ bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
+ void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
+
+ bool matchSextTruncSextLoad(MachineInstr &MI);
+ bool applySextTruncSextLoad(MachineInstr &MI);
+
+ /// Match sext_inreg(load p), imm -> sextload p
+ bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
+ bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
+
+ /// If a brcond's true block is not the fallthrough, make it so by inverting
+ /// the condition and swapping operands.
+ bool matchOptBrCondByInvertingCond(MachineInstr &MI);
+ void applyOptBrCondByInvertingCond(MachineInstr &MI);
+
+ /// If \p MI is G_CONCAT_VECTORS, try to combine it.
+ /// Returns true if MI changed.
+ /// Right now, we support:
+ /// - concat_vector(undef, undef) => undef
+ /// - concat_vector(build_vector(A, B), build_vector(C, D)) =>
+ /// build_vector(A, B, C, D)
+ ///
+ /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+ bool tryCombineConcatVectors(MachineInstr &MI);
+ /// Check if the G_CONCAT_VECTORS \p MI is undef or if it
+ /// can be flattened into a build_vector.
+ /// In the first case \p IsUndef will be true.
+ /// In the second case \p Ops will contain the operands needed
+ /// to produce the flattened build_vector.
+ ///
+ /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+ bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+ SmallVectorImpl<Register> &Ops);
+ /// Replace \p MI with a flattened build_vector with \p Ops or an
+ /// implicit_def if IsUndef is true.
+ void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef,
+ const ArrayRef<Register> Ops);
+
+ /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
+ /// Returns true if MI changed.
+ ///
+ /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
+ bool tryCombineShuffleVector(MachineInstr &MI);
+ /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a
+ /// concat_vectors.
+ /// \p Ops will contain the operands needed to produce the flattened
+ /// concat_vectors.
+ ///
+ /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
+ bool matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops);
+ /// Replace \p MI with a concat_vectors with \p Ops.
+ void applyCombineShuffleVector(MachineInstr &MI,
+ const ArrayRef<Register> Ops);
+
+ /// Optimize memcpy intrinsics et al, e.g. constant len calls.
+ /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
+ ///
+ /// For example (pre-indexed):
+ ///
+ /// $addr = G_PTR_ADD $base, $offset
+ /// [...]
+ /// $val = G_LOAD $addr
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre)
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// or (post-indexed):
+ ///
+ /// G_STORE $val, $base
+ /// [...]
+ /// $addr = G_PTR_ADD $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $addr = G_INDEXED_STORE $val, $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
+ bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
+
+ bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+ bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+
+ /// Fold (shift (shift base, x), y) -> (shift base (x+y))
+ bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
+ bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
+
+ /// If we have a shift-by-constant of a bitwise logic op that itself has a
+ /// shift-by-constant operand with identical opcode, we may be able to convert
+ /// that into 2 independent shifts followed by the logic op.
+ bool matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo);
+ bool applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo);
+
+ /// Transform a multiply by a power-of-2 value to a left shift.
+ bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+ bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+
+ // Transform a G_SHL with an extended source into a narrower shift if
+ // possible.
+ bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
+ bool applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData);
+
+ /// Reduce a shift by a constant to an unmerge and a shift on a half sized
+ /// type. This will not produce a shift smaller than \p TargetShiftSize.
+ bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize,
+ unsigned &ShiftVal);
+ bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal);
+ bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount);
+
+ /// Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
+ bool
+ matchCombineUnmergeMergeToPlainValues(MachineInstr &MI,
+ SmallVectorImpl<Register> &Operands);
+ bool
+ applyCombineUnmergeMergeToPlainValues(MachineInstr &MI,
+ SmallVectorImpl<Register> &Operands);
+
+ /// Transform G_UNMERGE Constant -> Constant1, Constant2, ...
+ bool matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts);
+ bool applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts);
+
+ /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
+ bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
+ bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
+
+ /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0
+ bool matchCombineUnmergeZExtToZExt(MachineInstr &MI);
+ bool applyCombineUnmergeZExtToZExt(MachineInstr &MI);
+
+ /// Transform fp_instr(cst) to constant result of the fp operation.
+ bool matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst);
+ bool applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst);
+
+ /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
+ bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
+ bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);
+
+ /// Transform PtrToInt(IntToPtr(x)) to x.
+ bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg);
+ bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg);
+
+ /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y)
+ /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y)
+ bool matchCombineAddP2IToPtrAdd(MachineInstr &MI,
+ std::pair<Register, bool> &PtrRegAndCommute);
+ bool applyCombineAddP2IToPtrAdd(MachineInstr &MI,
+ std::pair<Register, bool> &PtrRegAndCommute);
+
+ // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2
+ bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+ bool applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+
+ /// Transform anyext(trunc(x)) to x.
+ bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
+ bool applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
+
+ /// Transform [asz]ext([asz]ext(x)) to [asz]ext x.
+ bool matchCombineExtOfExt(MachineInstr &MI,
+ std::tuple<Register, unsigned> &MatchInfo);
+ bool applyCombineExtOfExt(MachineInstr &MI,
+ std::tuple<Register, unsigned> &MatchInfo);
+
+ /// Transform fneg(fneg(x)) to x.
+ bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg);
+
+ /// Match fabs(fabs(x)) to fabs(x).
+ bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+ bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+
+ /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
+ bool matchCombineTruncOfExt(MachineInstr &MI,
+ std::pair<Register, unsigned> &MatchInfo);
+ bool applyCombineTruncOfExt(MachineInstr &MI,
+ std::pair<Register, unsigned> &MatchInfo);
+
+ /// Transform trunc (shl x, K) to shl (trunc x),
+ /// K => K < VT.getScalarSizeInBits().
+ bool matchCombineTruncOfShl(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ bool applyCombineTruncOfShl(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+
+ /// Transform G_MUL(x, -1) to G_SUB(0, x)
+ bool applyCombineMulByNegativeOne(MachineInstr &MI);
+
+ /// Return true if any explicit use operand on \p MI is defined by a
+ /// G_IMPLICIT_DEF.
+ bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
+
+ /// Return true if all register explicit use operands on \p MI are defined by
+ /// a G_IMPLICIT_DEF.
+ bool matchAllExplicitUsesAreUndef(MachineInstr &MI);
+
+ /// Return true if a G_SHUFFLE_VECTOR instruction \p MI has an undef mask.
+ bool matchUndefShuffleVectorMask(MachineInstr &MI);
+
+ /// Return true if a G_STORE instruction \p MI is storing an undef value.
+ bool matchUndefStore(MachineInstr &MI);
+
+ /// Return true if a G_SELECT instruction \p MI has an undef comparison.
+ bool matchUndefSelectCmp(MachineInstr &MI);
+
+ /// Return true if a G_SELECT instruction \p MI has a constant comparison. If
+ /// true, \p OpIdx will store the operand index of the known selected value.
+ bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx);
+
+ /// Replace an instruction with a G_FCONSTANT with value \p C.
+ bool replaceInstWithFConstant(MachineInstr &MI, double C);
+
+ /// Replace an instruction with a G_CONSTANT with value \p C.
+ bool replaceInstWithConstant(MachineInstr &MI, int64_t C);
+
+ /// Replace an instruction with a G_IMPLICIT_DEF.
+ bool replaceInstWithUndef(MachineInstr &MI);
+
+ /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
+ bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
+
+ /// Delete \p MI and replace all of its uses with \p Replacement.
+ bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
+
+ /// Return true if \p MOP1 and \p MOP2 are register operands are defined by
+ /// equivalent instructions.
+ bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
+
+ /// Return true if \p MOP is defined by a G_CONSTANT with a value equal to
+ /// \p C.
+ bool matchConstantOp(const MachineOperand &MOP, int64_t C);
+
+ /// Optimize (cond ? x : x) -> x
+ bool matchSelectSameVal(MachineInstr &MI);
+
+ /// Optimize (x op x) -> x
+ bool matchBinOpSameVal(MachineInstr &MI);
+
+ /// Check if operand \p OpIdx is zero.
+ bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx);
+
+ /// Check if operand \p OpIdx is undef.
+ bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx);
+
+ /// Check if operand \p OpIdx is known to be a power of 2.
+ bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx);
+
+ /// Erase \p MI
+ bool eraseInst(MachineInstr &MI);
+
+ /// Return true if MI is a G_ADD which can be simplified to a G_SUB.
+ bool matchSimplifyAddToSub(MachineInstr &MI,
+ std::tuple<Register, Register> &MatchInfo);
+ bool applySimplifyAddToSub(MachineInstr &MI,
+ std::tuple<Register, Register> &MatchInfo);
+
+ /// Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
+ bool
+ matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI,
+ InstructionStepsMatchInfo &MatchInfo);
+
+ /// Replace \p MI with a series of instructions described in \p MatchInfo.
+ bool applyBuildInstructionSteps(MachineInstr &MI,
+ InstructionStepsMatchInfo &MatchInfo);
+
+ /// Match ashr (shl x, C), C -> sext_inreg (C)
+ bool matchAshrShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+ bool applyAshShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+ /// \return true if \p MI is a G_AND instruction whose operands are x and y
+ /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
+ ///
+ /// \param [in] MI - The G_AND instruction.
+ /// \param [out] Replacement - A register the G_AND should be replaced with on
+ /// success.
+ bool matchRedundantAnd(MachineInstr &MI, Register &Replacement);
+
+ /// \return true if \p MI is a G_OR instruction whose operands are x and y
+ /// where x | y == x or x | y == y. (E.g., one of operands is all-zeros
+ /// value.)
+ ///
+ /// \param [in] MI - The G_OR instruction.
+ /// \param [out] Replacement - A register the G_OR should be replaced with on
+ /// success.
+ bool matchRedundantOr(MachineInstr &MI, Register &Replacement);
+
+ /// \return true if \p MI is a G_SEXT_INREG that can be erased.
+ bool matchRedundantSExtInReg(MachineInstr &MI);
+
+ /// Combine inverting a result of a compare into the opposite cond code.
+ bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
+ bool applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
+
+ /// Fold (xor (and x, y), y) -> (and (not x), y)
+ ///{
+ bool matchXorOfAndWithSameReg(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ bool applyXorOfAndWithSameReg(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ ///}
+
+ /// Combine G_PTR_ADD with nullptr to G_INTTOPTR
+ bool matchPtrAddZero(MachineInstr &MI);
+ bool applyPtrAddZero(MachineInstr &MI);
+
+ /// Combine G_UREM x, (known power of 2) to an add and bitmasking.
+ bool applySimplifyURemByPow2(MachineInstr &MI);
+
+ bool matchCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
+
+ bool applyCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
+
+private:
+ // Memcpy family optimization helpers.
+ bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ unsigned KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile);
+ bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
+ unsigned KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile);
+ bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
+ unsigned KnownLen, Align DstAlign, bool IsVolatile);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a post-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a pre-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
};
} // namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 3b09a8e..e95a5e2 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -27,9 +27,11 @@
class CombinerInfo {
public:
CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal,
- LegalizerInfo *LInfo)
+ const LegalizerInfo *LInfo, bool OptEnabled, bool OptSize,
+ bool MinSize)
: IllegalOpsAllowed(AllowIllegalOps),
- LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) {
+ LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo),
+ EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) {
assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) &&
"Expecting legalizerInfo when illegalops not allowed");
}
@@ -43,6 +45,15 @@
bool LegalizeIllegalOps; // TODO: Make use of this.
const LegalizerInfo *LInfo;
+ /// Whether optimizations should be enabled. This is to distinguish between
+ /// uses of the combiner unconditionally and only when optimizations are
+ /// specifically enabled/
+ bool EnableOpt;
+ /// Whether we're optimizing for size.
+ bool EnableOptSize;
+ /// Whether we're optimizing for minsize (-Oz).
+ bool EnableMinSize;
+
/// Attempt to combine instructions using MI as the root.
///
/// Use Observer to report the creation, modification, and erasure of
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index e817d9b..df196bf 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -54,6 +54,17 @@
return buildConstant(Dst, MaybeCst->getSExtValue());
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
+ return buildConstant(Dst, MaybeCst->getSExtValue());
+ break;
+ }
}
return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps);
}
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index e5691cb..dd7f04a 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -51,7 +51,7 @@
/// For convenience, finishedChangingAllUsesOfReg() will report the completion
/// of the changes. The use list may change between this call and
/// finishedChangingAllUsesOfReg().
- void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg);
+ void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg);
/// All instructions reported as changing by changingAllUsesOfReg() have
/// finished being changed.
void finishedChangingAllUsesOfReg();
@@ -101,7 +101,7 @@
void MF_HandleRemoval(MachineInstr &MI) override { erasingInstr(MI); }
};
-/// A simple RAII based CSEInfo installer.
+/// A simple RAII based Delegate installer.
/// Use this in a scope to install a delegate to the MachineFunction and reset
/// it at the end of the scope.
class RAIIDelegateInstaller {
@@ -113,5 +113,27 @@
~RAIIDelegateInstaller();
};
+/// A simple RAII based Observer installer.
+/// Use this in a scope to install the Observer to the MachineFunction and reset
+/// it at the end of the scope.
+class RAIIMFObserverInstaller {
+ MachineFunction &MF;
+
+public:
+ RAIIMFObserverInstaller(MachineFunction &MF, GISelChangeObserver &Observer);
+ ~RAIIMFObserverInstaller();
+};
+
+/// Class to install both of the above.
+class RAIIMFObsDelInstaller {
+ RAIIDelegateInstaller DelI;
+ RAIIMFObserverInstaller ObsI;
+
+public:
+ RAIIMFObsDelInstaller(MachineFunction &MF, GISelObserverWrapper &Wrapper)
+ : DelI(MF, &Wrapper), ObsI(MF, Wrapper) {}
+ ~RAIIMFObsDelInstaller() = default;
+};
+
} // namespace llvm
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
new file mode 100644
index 0000000..eafed37
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -0,0 +1,131 @@
+//===- llvm/CodeGen/GlobalISel/GISelKnownBits.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for querying information about KnownBits during GISel
+/// passes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/KnownBits.h"
+
+namespace llvm {
+
+class TargetLowering;
+class DataLayout;
+
+class GISelKnownBits : public GISelChangeObserver {
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ const TargetLowering &TL;
+ const DataLayout &DL;
+ unsigned MaxDepth;
+ /// Cache maintained during a computeKnownBits request.
+ SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;
+
+ void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
+ unsigned computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts, unsigned Depth = 0);
+
+public:
+ GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);
+ virtual ~GISelKnownBits() = default;
+
+ const MachineFunction &getMachineFunction() const {
+ return MF;
+ }
+
+ const DataLayout &getDataLayout() const {
+ return DL;
+ }
+
+ virtual void computeKnownBitsImpl(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
+ unsigned computeNumSignBits(Register R, const APInt &DemandedElts,
+ unsigned Depth = 0);
+ unsigned computeNumSignBits(Register R, unsigned Depth = 0);
+
+ // KnownBitsAPI
+ KnownBits getKnownBits(Register R);
+ KnownBits getKnownBits(Register R, const APInt &DemandedElts,
+ unsigned Depth = 0);
+
+ // Calls getKnownBits for first operand def of MI.
+ KnownBits getKnownBits(MachineInstr &MI);
+ APInt getKnownZeroes(Register R);
+ APInt getKnownOnes(Register R);
+
+ /// \return true if 'V & Mask' is known to be zero in DemandedElts. We use
+ /// this predicate to simplify operations downstream.
+ /// Mask is known to be zero for bits that V cannot have.
+ bool maskedValueIsZero(Register Val, const APInt &Mask) {
+ return Mask.isSubsetOf(getKnownBits(Val).Zero);
+ }
+
+ /// \return true if the sign bit of Op is known to be zero. We use this
+ /// predicate to simplify operations downstream.
+ bool signBitIsZero(Register Op);
+
+ static void computeKnownBitsForAlignment(KnownBits &Known,
+ Align Alignment) {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(Alignment));
+ }
+
+ /// \return The known alignment for the pointer-like value \p R.
+ Align computeKnownAlignment(Register R, unsigned Depth = 0);
+
+ // Observer API. No-op for non-caching implementation.
+ void erasingInstr(MachineInstr &MI) override{};
+ void createdInstr(MachineInstr &MI) override{};
+ void changingInstr(MachineInstr &MI) override{};
+ void changedInstr(MachineInstr &MI) override{};
+
+protected:
+ unsigned getMaxDepth() const { return MaxDepth; }
+};
+
+/// To use KnownBitsInfo analysis in a pass,
+/// KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis>().get(MF);
+/// Add to observer if the Info is caching.
+/// WrapperObserver.addObserver(Info);
+
+/// Eventually add other features such as caching/ser/deserializing
+/// to MIR etc. Those implementations can derive from GISelKnownBits
+/// and override computeKnownBitsImpl.
+class GISelKnownBitsAnalysis : public MachineFunctionPass {
+ std::unique_ptr<GISelKnownBits> Info;
+
+public:
+ static char ID;
+ GISelKnownBitsAnalysis() : MachineFunctionPass(ID) {
+ initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+ GISelKnownBits &get(MachineFunction &MF) {
+ if (!Info)
+ Info = std::make_unique<GISelKnownBits>(MF);
+ return *Info.get();
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void releaseMemory() override { Info.reset(); }
+};
+} // namespace llvm
+
+#endif // ifdef
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index b0bb519..9e7ade3 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -11,9 +11,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/Debug.h"
namespace llvm {
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 8654ba8..8eab8a5 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -20,13 +20,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Types.h"
-#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CodeGen.h"
#include <memory>
#include <utility>
@@ -37,8 +38,8 @@
class CallInst;
class CallLowering;
class Constant;
+class ConstrainedFPIntrinsic;
class DataLayout;
-class FunctionLoweringInfo;
class Instruction;
class MachineBasicBlock;
class MachineFunction;
@@ -202,6 +203,10 @@
/// \return true if the materialization succeeded.
bool translate(const Constant &C, Register Reg);
+ // Translate U as a copy of V.
+ bool translateCopy(const User &U, const Value &V,
+ MachineIRBuilder &MIRBuilder);
+
/// Translate an LLVM bitcast into generic IR. Either a COPY or a G_BITCAST is
/// emitted.
bool translateBitCast(const User &U, MachineIRBuilder &MIRBuilder);
@@ -213,13 +218,15 @@
bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);
/// Translate an LLVM string intrinsic (memcpy, memset, ...).
- bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
- unsigned ID);
+ bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
+ unsigned Opcode);
void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder);
+ bool translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder);
/// Helper function for translateSimpleIntrinsic.
/// \return The generic opcode for \p IntrinsicID if \p IntrinsicID is a
@@ -232,10 +239,13 @@
bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder);
+ bool translateConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI,
+ MachineIRBuilder &MIRBuilder);
+
bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder);
- bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder);
+ bool translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder);
/// Returns true if the value should be split into multiple LLTs.
/// If \p Offsets is given then the split type's offsets will be stored in it.
@@ -243,10 +253,26 @@
bool valueIsSplit(const Value &V,
SmallVectorImpl<uint64_t> *Offsets = nullptr);
+ /// Common code for translating normal calls or invokes.
+ bool translateCallBase(const CallBase &CB, MachineIRBuilder &MIRBuilder);
+
/// Translate call instruction.
/// \pre \p U is a call instruction.
bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
+ /// When an invoke or a cleanupret unwinds to the next EH pad, there are
+ /// many places it could ultimately go. In the IR, we have a single unwind
+ /// destination, but in the machine CFG, we enumerate all the possible blocks.
+ /// This function skips over imaginary basic blocks that hold catchswitch
+ /// instructions, and finds all the "real" machine
+ /// basic block destinations. As those destinations may not be successors of
+ /// EHPadBB, here we also calculate the edge probability to those
+ /// destinations. The passed-in Prob is the edge probability to EHPadBB.
+ bool findUnwindDestinations(
+ const BasicBlock *EHPadBB, BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests);
+
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
@@ -278,11 +304,37 @@
/// MachineBasicBlocks for the function have been created.
void finishPendingPhis();
+ /// Translate \p Inst into a unary operation \p Opcode.
+ /// \pre \p U is a unary operation.
+ bool translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder);
+
/// Translate \p Inst into a binary operation \p Opcode.
/// \pre \p U is a binary operation.
bool translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder);
+ /// If the set of cases should be emitted as a series of branches, return
+ /// true. If we should emit this as a bunch of and/or'd together conditions,
+ /// return false.
+ bool shouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
+ /// Helper method for findMergedConditions.
+ /// This function emits a branch and is used at the leaves of an OR or an
+ /// AND operator tree.
+ void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+ /// Used during condbr translation to find trees of conditions that can be
+ /// optimized.
+ void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+
/// Translate branch (br) instruction.
/// \pre \p U is a branch instruction.
bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
@@ -296,19 +348,23 @@
void emitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB,
MachineIRBuilder &MIB);
- bool lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
- MachineBasicBlock *SwitchMBB,
- MachineBasicBlock *CurMBB,
- MachineBasicBlock *DefaultMBB,
- MachineIRBuilder &MIB,
- MachineFunction::iterator BBI,
- BranchProbability UnhandledProbs,
- SwitchCG::CaseClusterIt I,
- MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable);
+ /// Generate for for the BitTest header block, which precedes each sequence of
+ /// BitTestCases.
+ void emitBitTestHeader(SwitchCG::BitTestBlock &BTB,
+ MachineBasicBlock *SwitchMBB);
+ /// Generate code to produces one "bit test" for a given BitTestCase \p B.
+ void emitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, Register Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
- bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
- Value *Cond,
+ bool lowerJumpTableWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough, bool FallthroughUnreachable);
+
+ bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond,
MachineBasicBlock *Fallthrough,
bool FallthroughUnreachable,
BranchProbability UnhandledProbs,
@@ -316,6 +372,14 @@
MachineIRBuilder &MIB,
MachineBasicBlock *SwitchMBB);
+ bool lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable);
+
bool lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB,
@@ -342,8 +406,6 @@
/// \pre \p U is a return instruction.
bool translateRet(const User &U, MachineIRBuilder &MIRBuilder);
- bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder);
-
bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder);
bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -428,6 +490,9 @@
bool translateFAdd(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FADD, U, MIRBuilder);
}
+ bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+ }
bool translateFMul(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FMUL, U, MIRBuilder);
}
@@ -449,6 +514,7 @@
bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder);
bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder);
bool translateFence(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateFreeze(const User &U, MachineIRBuilder &MIRBuilder);
// Stubs to keep the compiler happy while we implement the rest of the
// translation.
@@ -505,6 +571,8 @@
/// Current target configuration. Controls how the pass handles errors.
const TargetPassConfig *TPC;
+ CodeGenOpt::Level OptLevel;
+
/// Current optimization remark emitter. Used to report failures.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
@@ -514,6 +582,10 @@
// function has the optnone attribute.
bool EnableOpts = false;
+ /// True when the block contains a tail call. This allows the IRTranslator to
+ /// stop translating such blocks early.
+ bool HasTailCall = false;
+
/// Switch analysis and optimization.
class GISelSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -571,7 +643,7 @@
/// Get the alignment of the given memory operation instruction. This will
/// either be the explicitly specified value or the ABI-required alignment for
/// the type being accessed (according to the Module's DataLayout).
- unsigned getMemOpAlignment(const Instruction &I);
+ Align getMemOpAlign(const Instruction &I);
/// Get the MachineBasicBlock that represents \p BB. Specifically, the block
/// returned will be the head of the translated block (suitable for branch
@@ -600,12 +672,12 @@
BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const;
- void addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- BranchProbability Prob);
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
public:
- // Ctor, nothing fancy.
- IRTranslator();
+ IRTranslator(CodeGenOpt::Level OptLevel = CodeGenOpt::None);
StringRef getPassName() const override { return "IRTranslator"; }
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h
new file mode 100644
index 0000000..ac61848
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h
@@ -0,0 +1,67 @@
+//===- llvm/CodeGen/GlobalISel/InlineAsmLowering.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM inline asm to machine code INLINEASM.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_INLINEASMLOWERING_H
+#define LLVM_CODEGEN_GLOBALISEL_INLINEASMLOWERING_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <functional>
+
+namespace llvm {
+class CallBase;
+class MachineIRBuilder;
+class MachineOperand;
+class Register;
+class TargetLowering;
+class Value;
+
+class InlineAsmLowering {
+ const TargetLowering *TLI;
+
+ virtual void anchor();
+
+public:
+ /// Lower the given inline asm call instruction
+ /// \p GetOrCreateVRegs is a callback to materialize a register for the
+ /// input and output operands of the inline asm
+ /// \return True if the lowering succeeds, false otherwise.
+ bool lowerInlineAsm(MachineIRBuilder &MIRBuilder, const CallBase &CB,
+ std::function<ArrayRef<Register>(const Value &Val)>
+ GetOrCreateVRegs) const;
+
+ /// Lower the specified operand into the Ops vector.
+ /// \p Val is the IR input value to be lowered
+ /// \p Constraint is the user supplied constraint string
+ /// \p Ops is the vector to be filled with the lowered operands
+ /// \return True if the lowering succeeds, false otherwise.
+ virtual bool lowerAsmOperandForConstraint(Value *Val, StringRef Constraint,
+ std::vector<MachineOperand> &Ops,
+ MachineIRBuilder &MIRBuilder) const;
+
+protected:
+ /// Getter for generic TargetLowering class.
+ const TargetLowering *getTLI() const { return TLI; }
+
+ /// Getter for target specific TargetLowering class.
+ template <class XXXTargetLowering> const XXXTargetLowering *getTLI() const {
+ return static_cast<const XXXTargetLowering *>(TLI);
+ }
+
+public:
+ InlineAsmLowering(const TargetLowering *TLI) : TLI(TLI) {}
+ virtual ~InlineAsmLowering() = default;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GLOBALISEL_INLINEASMLOWERING_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index e4d05a5..bf9991e 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -31,6 +31,7 @@
class APInt;
class APFloat;
+class GISelKnownBits;
class MachineInstr;
class MachineInstrBuilder;
class MachineFunction;
@@ -111,6 +112,14 @@
/// - InsnID - Instruction ID
/// - Expected opcode
GIM_CheckOpcode,
+
+ /// Check the opcode on the specified instruction, checking 2 acceptable
+ /// alternatives.
+ /// - InsnID - Instruction ID
+ /// - Expected opcode
+ /// - Alternative expected opcode
+ GIM_CheckOpcodeIsEither,
+
/// Check the instruction has the right number of operands
/// - InsnID - Instruction ID
/// - Expected number of operands
@@ -138,6 +147,23 @@
/// - MMOIdx - MMO index
/// - Size - The size in bytes of the memory access
GIM_CheckMemorySizeEqualTo,
+
+ /// Check the address space of the memory access for the given machine memory
+ /// operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - NumAddrSpace - Number of valid address spaces
+ /// - AddrSpaceN - An allowed space of the memory access
+ /// - AddrSpaceN+1 ...
+ GIM_CheckMemoryAddressSpace,
+
+ /// Check the minimum alignment of the memory access for the given machine
+ /// memory operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - MinAlign - Minimum acceptable alignment
+ GIM_CheckMemoryAlignment,
+
/// Check the size of the memory access for the given machine memory operand
/// against the size of an operand.
/// - InsnID - Instruction ID
@@ -146,6 +172,15 @@
GIM_CheckMemorySizeEqualToLLT,
GIM_CheckMemorySizeLessThanLLT,
GIM_CheckMemorySizeGreaterThanLLT,
+
+ /// Check if this is a vector that can be treated as a vector splat
+ /// constant. This is valid for both G_BUILD_VECTOR as well as
+ /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1
+ /// element.
+ /// - InsnID - Instruction ID
+ GIM_CheckIsBuildVectorAllOnes,
+ GIM_CheckIsBuildVectorAllZeros,
+
/// Check a generic C++ instruction predicate
/// - InsnID - Instruction ID
/// - PredicateID - The ID of the predicate function to call
@@ -191,11 +226,22 @@
/// - Expected Intrinsic ID
GIM_CheckIntrinsicID,
+ /// Check the operand is a specific predicate
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected predicate
+ GIM_CheckCmpPredicate,
+
/// Check the specified operand is an MBB
/// - InsnID - Instruction ID
/// - OpIdx - Operand index
GIM_CheckIsMBB,
+ /// Check the specified operand is an Imm
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ GIM_CheckIsImm,
+
/// Check if the specified operand is safe to fold into the current
/// instruction.
/// - InsnID - Instruction ID
@@ -208,6 +254,15 @@
/// - OtherOpIdx - Other operand index
GIM_CheckIsSameOperand,
+ /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some
+ /// named operands that will be recorded in RecordedOperands. Names of these
+ /// operands are referenced in predicate argument list. Emitter determines
+ /// StoreIdx(corresponds to the order in which names appear in argument list).
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - StoreIdx - Store location in RecordedOperands.
+ GIM_RecordNamedOperand,
+
/// Fail the current try-block, or completely fail to match if there is no
/// current try-block.
GIM_Reject,
@@ -264,6 +319,13 @@
/// - TempRegFlags - The register flags to set
GIR_AddTempRegister,
+ /// Add a temporary register to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - TempRegID - The temporary register ID to add
+ /// - TempRegFlags - The register flags to set
+ /// - SubRegIndex - The subregister index to set
+ GIR_AddTempSubRegister,
+
/// Add an immediate to the specified instruction
/// - InsnID - Instruction ID to modify
/// - Imm - The immediate to add
@@ -284,6 +346,14 @@
/// - RendererFnID - Custom renderer function to call
GIR_CustomRenderer,
+ /// Render operands to the specified instruction using a custom function,
+ /// reading from a specific operand.
+ /// - InsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to get the matched operand from
+ /// - OpIdx - Operand index in OldInsnID the render function should read from..
+ /// - RendererFnID - Custom renderer function to call
+ GIR_CustomOperandRenderer,
+
/// Render a G_CONSTANT operator as a sign-extended immediate.
/// - NewInsnID - Instruction ID to modify
/// - OldInsnID - Instruction ID to copy from
@@ -355,7 +425,25 @@
/// if returns true:
/// for I in all mutated/inserted instructions:
/// !isPreISelGenericOpcode(I.getOpcode())
- virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0;
+ virtual bool select(MachineInstr &I) = 0;
+
+ CodeGenCoverage *CoverageInfo = nullptr;
+ GISelKnownBits *KnownBits = nullptr;
+ MachineFunction *MF = nullptr;
+
+ virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
+ llvm_unreachable("TableGen should have emitted implementation");
+ }
+
+ /// Setup per-MF selector state.
+ virtual void setupMF(MachineFunction &mf,
+ GISelKnownBits &KB,
+ CodeGenCoverage &covinfo) {
+ CoverageInfo = &covinfo;
+ KnownBits = &KB;
+ MF = &mf;
+ setupGeneratedPerFunctionState(mf);
+ }
protected:
using ComplexRendererFns =
@@ -367,6 +455,11 @@
std::vector<ComplexRendererFns::value_type> Renderers;
RecordedMIVector MIs;
DenseMap<unsigned, unsigned> TempRegisters;
+ /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1'
+ /// referenced in its argument list. Operands are inserted at index set by
+ /// emitter, it corresponds to the order in which names appear in argument
+ /// list. Currently such predicates don't have more then 3 arguments.
+ std::array<const MachineOperand *, 3> RecordedOperands;
MatcherState(unsigned MaxRenderers);
};
@@ -427,7 +520,9 @@
llvm_unreachable(
"Subclasses must override this with a tablegen-erated function");
}
- virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const {
+ virtual bool testMIPredicate_MI(
+ unsigned, const MachineInstr &,
+ const std::array<const MachineOperand *, 3> &Operands) const {
llvm_unreachable(
"Subclasses must override this with a tablegen-erated function");
}
@@ -445,7 +540,7 @@
bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const;
- /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the
+ /// Return true if the specified operand is a G_PTR_ADD with a G_CONSTANT on the
/// right-hand side. GlobalISel's separation of pointer and integer types
/// means that we don't need to worry about G_OR with equivalent semantics.
bool isBaseWithConstantOffset(const MachineOperand &Root,
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index e010180..bcb84c3 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -57,6 +58,11 @@
uint64_t CurrentIdx = 0;
SmallVector<uint64_t, 4> OnFailResumeAt;
+ // Bypass the flag check on the instruction, and only look at the MCInstrDesc.
+ bool NoFPException = !State.MIs[0]->getDesc().mayRaiseFPException();
+
+ const uint16_t Flags = State.MIs[0]->getFlags();
+
enum RejectAction { RejectAndGiveUp, RejectAndResume };
auto handleReject = [&]() -> RejectAction {
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
@@ -70,6 +76,19 @@
return RejectAndResume;
};
+ auto propagateFlags = [=](NewMIVector &OutMIs) {
+ for (auto MIB : OutMIs) {
+ // Set the NoFPExcept flag when no original matched instruction could
+ // raise an FP exception, but the new instruction potentially might.
+ uint16_t MIBFlags = Flags;
+ if (NoFPException && MIB->mayRaiseFPException())
+ MIBFlags |= MachineInstr::NoFPExcept;
+ MIB.setMIFlags(MIBFlags);
+ }
+
+ return true;
+ };
+
while (true) {
assert(CurrentIdx != ~0u && "Invalid MatchTable index");
int64_t MatcherOpcode = MatchTable[CurrentIdx++];
@@ -98,7 +117,7 @@
return false;
break;
}
- if (TRI.isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(MO.getReg())) {
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": Is a physical register\n");
if (handleReject() == RejectAndGiveUp)
@@ -135,24 +154,31 @@
break;
}
- case GIM_CheckOpcode: {
+ case GIM_CheckOpcode:
+ case GIM_CheckOpcodeIsEither: {
int64_t InsnID = MatchTable[CurrentIdx++];
- int64_t Expected = MatchTable[CurrentIdx++];
+ int64_t Expected0 = MatchTable[CurrentIdx++];
+ int64_t Expected1 = -1;
+ if (MatcherOpcode == GIM_CheckOpcodeIsEither)
+ Expected1 = MatchTable[CurrentIdx++];
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
unsigned Opcode = State.MIs[InsnID]->getOpcode();
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
- << "], ExpectedOpcode=" << Expected
- << ") // Got=" << Opcode << "\n");
- if (Opcode != Expected) {
+ dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
+ << "], ExpectedOpcode=" << Expected0;
+ if (MatcherOpcode == GIM_CheckOpcodeIsEither)
+ dbgs() << " || " << Expected1;
+ dbgs() << ") // Got=" << Opcode << "\n";
+ );
+
+ if (Opcode != Expected0 && Opcode != Expected1) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
-
case GIM_SwitchOpcode: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t LowerBound = MatchTable[CurrentIdx++];
@@ -174,7 +200,7 @@
CurrentIdx = MatchTable[CurrentIdx + (Opcode - LowerBound)];
if (!CurrentIdx) {
CurrentIdx = Default;
- break;
+ break;
}
OnFailResumeAt.push_back(Default);
break;
@@ -302,6 +328,35 @@
return false;
break;
}
+ case GIM_CheckIsBuildVectorAllOnes:
+ case GIM_CheckIsBuildVectorAllZeros: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx
+ << ": GIM_CheckBuildVectorAll{Zeros|Ones}(MIs["
+ << InsnID << "])\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+ const MachineInstr *MI = State.MIs[InsnID];
+ assert((MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR ||
+ MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC) &&
+ "Expected G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC");
+
+ if (MatcherOpcode == GIM_CheckIsBuildVectorAllOnes) {
+ if (!isBuildVectorAllOnes(*MI, MRI)) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ } else {
+ if (!isBuildVectorAllZeros(*MI, MRI)) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ }
+
+ break;
+ }
case GIM_CheckCxxInsnPredicate: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Predicate = MatchTable[CurrentIdx++];
@@ -312,7 +367,8 @@
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate");
- if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID]))
+ if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID],
+ State.RecordedOperands))
if (handleReject() == RejectAndGiveUp)
return false;
break;
@@ -370,6 +426,69 @@
return false;
break;
}
+ case GIM_CheckMemoryAddressSpace: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t MMOIdx = MatchTable[CurrentIdx++];
+ // This accepts a list of possible address spaces.
+ const int NumAddrSpace = MatchTable[CurrentIdx++];
+
+ if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
+
+ // Need to still jump to the end of the list of address spaces if we find
+ // a match earlier.
+ const uint64_t LastIdx = CurrentIdx + NumAddrSpace;
+
+ const MachineMemOperand *MMO
+ = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ const unsigned MMOAddrSpace = MMO->getAddrSpace();
+
+ bool Success = false;
+ for (int I = 0; I != NumAddrSpace; ++I) {
+ unsigned AddrSpace = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
+ << AddrSpace << '\n');
+
+ if (AddrSpace == MMOAddrSpace) {
+ Success = true;
+ break;
+ }
+ }
+
+ CurrentIdx = LastIdx;
+ if (!Success && handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
+ case GIM_CheckMemoryAlignment: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t MMOIdx = MatchTable[CurrentIdx++];
+ unsigned MinAlign = MatchTable[CurrentIdx++];
+
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+ if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
+
+ MachineMemOperand *MMO
+ = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment"
+ << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx
+ << ")->getAlignment() >= " << MinAlign << ")\n");
+ if (MMO->getAlign() < MinAlign && handleReject() == RejectAndGiveUp)
+ return false;
+
+ break;
+ }
case GIM_CheckMemorySizeEqualTo: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t MMOIdx = MatchTable[CurrentIdx++];
@@ -499,6 +618,20 @@
break;
}
+ case GIM_RecordNamedOperand: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ uint64_t StoreIdx = MatchTable[CurrentIdx++];
+
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_RecordNamedOperand(MIs["
+ << InsnID << "]->getOperand(" << OpIdx
+ << "), StoreIdx=" << StoreIdx << ")\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ assert(StoreIdx < State.RecordedOperands.size() && "Index out of range");
+ State.RecordedOperands[StoreIdx] = &State.MIs[InsnID]->getOperand(OpIdx);
+ break;
+ }
case GIM_CheckRegBankForClass: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -510,7 +643,8 @@
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (!MO.isReg() ||
- &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) !=
+ &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum),
+ MRI.getType(MO.getReg())) !=
RBI.getRegBank(MO.getReg(), MRI, TRI)) {
if (handleReject() == RejectAndGiveUp)
return false;
@@ -577,10 +711,15 @@
<< "), Value=" << Value << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
- if (!MO.isCImm() || !MO.getCImm()->equalsInt(Value)) {
- if (handleReject() == RejectAndGiveUp)
- return false;
- }
+ if (MO.isImm() && MO.getImm() == Value)
+ break;
+
+ if (MO.isCImm() && MO.getCImm()->equalsInt(Value))
+ break;
+
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+
break;
}
@@ -599,7 +738,21 @@
return false;
break;
}
-
+ case GIM_CheckCmpPredicate: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ int64_t Value = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs["
+ << InsnID << "]->getOperand(" << OpIdx
+ << "), Value=" << Value << ")\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+ if (!MO.isPredicate() || MO.getPredicate() != Value)
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
case GIM_CheckIsMBB: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -613,7 +766,19 @@
}
break;
}
-
+ case GIM_CheckIsImm: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
+ << "]->getOperand(" << OpIdx << "))\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ break;
+ }
case GIM_CheckIsSafeToFold: {
int64_t InsnID = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
@@ -753,24 +918,35 @@
case GIR_AddRegister: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t RegNum = MatchTable[CurrentIdx++];
+ uint64_t RegFlags = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- OutMIs[InsnID].addReg(RegNum);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
- << InsnID << "], " << RegNum << ")\n");
+ OutMIs[InsnID].addReg(RegNum, RegFlags);
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
+ << InsnID << "], " << RegNum << ", " << RegFlags << ")\n");
break;
}
- case GIR_AddTempRegister: {
+ case GIR_AddTempRegister:
+ case GIR_AddTempSubRegister: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t TempRegID = MatchTable[CurrentIdx++];
uint64_t TempRegFlags = MatchTable[CurrentIdx++];
+ unsigned SubReg = 0;
+ if (MatcherOpcode == GIR_AddTempSubRegister)
+ SubReg = MatchTable[CurrentIdx++];
+
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags);
+
+ OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags, SubReg);
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": GIR_AddTempRegister(OutMIs["
<< InsnID << "], TempRegisters[" << TempRegID
- << "], " << TempRegFlags << ")\n");
+ << "]";
+ if (SubReg)
+ dbgs() << '.' << TRI.getSubRegIndexName(SubReg);
+ dbgs() << ", " << TempRegFlags << ")\n");
break;
}
@@ -854,8 +1030,27 @@
dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs["
<< InsnID << "], MIs[" << OldInsnID << "], "
<< RendererFnID << ")\n");
+ (ISel.*ISelInfo.CustomRenderers[RendererFnID])(
+ OutMIs[InsnID], *State.MIs[OldInsnID],
+ -1); // Not a source operand of the old instruction.
+ break;
+ }
+ case GIR_CustomOperandRenderer: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OldInsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ int64_t RendererFnID = MatchTable[CurrentIdx++];
+ assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
+
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIR_CustomOperandRenderer(OutMIs["
+ << InsnID << "], MIs[" << OldInsnID << "]->getOperand("
+ << OpIdx << "), "
+ << RendererFnID << ")\n");
(ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID],
- *State.MIs[OldInsnID]);
+ *State.MIs[OldInsnID],
+ OpIdx);
break;
}
case GIR_ConstrainOperandRC: {
@@ -939,6 +1134,7 @@
case GIR_Done:
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": GIR_Done\n");
+ propagateFlags(OutMIs);
return true;
default:
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index a22778b..e7bda3b 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -46,11 +46,11 @@
: Builder(B), MRI(MRI), LI(LI) {}
bool tryCombineAnyExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
- if (MI.getOpcode() != TargetOpcode::G_ANYEXT)
- return false;
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
- Builder.setInstr(MI);
+ Builder.setInstrAndDebugLoc(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
@@ -59,6 +59,7 @@
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -71,6 +72,7 @@
m_GSExt(m_Reg(ExtSrc)),
m_GZExt(m_Reg(ExtSrc)))))) {
Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *ExtMI, DeadInsts);
return true;
}
@@ -79,102 +81,231 @@
// Can't use MIPattern because we don't have a specific constant in mind.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
- const LLT &DstTy = MRI.getType(DstReg);
+ const LLT DstTy = MRI.getType(DstReg);
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
auto &CstVal = SrcMI->getOperand(1);
Builder.buildConstant(
DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}
}
- return tryFoldImplicitDef(MI, DeadInsts);
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
}
bool tryCombineZExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelObserverWrapper &Observer) {
+ assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
- if (MI.getOpcode() != TargetOpcode::G_ZEXT)
- return false;
-
- Builder.setInstr(MI);
+ Builder.setInstrAndDebugLoc(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
// zext(trunc x) - > and (aext/copy/trunc x), mask
+ // zext(sext x) -> and (sext x), mask
Register TruncSrc;
- if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
+ Register SextSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
+ mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
LLT DstTy = MRI.getType(DstReg);
if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
isConstantUnsupported(DstTy))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
- auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
- Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
- MIBMask);
+ APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
+ auto Mask = Builder.buildConstant(
+ DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
+ auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) :
+ Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
+ Builder.buildAnd(DstReg, Extended, Mask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
+ // zext(zext x) -> (zext x)
+ Register ZextSrc;
+ if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
+ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ZextSrc);
+ Observer.changedInstr(MI);
+ UpdatedDefs.push_back(DstReg);
+ markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
// Try to fold zext(g_constant) when the larger constant type is legal.
// Can't use MIPattern because we don't have a specific constant in mind.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
- const LLT &DstTy = MRI.getType(DstReg);
+ const LLT DstTy = MRI.getType(DstReg);
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
auto &CstVal = SrcMI->getOperand(1);
Builder.buildConstant(
DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}
}
- return tryFoldImplicitDef(MI, DeadInsts);
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
}
bool tryCombineSExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT);
- if (MI.getOpcode() != TargetOpcode::G_SEXT)
- return false;
+ Builder.setInstrAndDebugLoc(MI);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+ // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
+ LLT DstTy = MRI.getType(DstReg);
+ if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
+ return false;
+ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
+ LLT SrcTy = MRI.getType(SrcReg);
+ uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
+ Builder.buildInstr(
+ TargetOpcode::G_SEXT_INREG, {DstReg},
+ {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
+ // sext(zext x) -> (zext x)
+ // sext(sext x) -> (sext x)
+ Register ExtSrc;
+ MachineInstr *ExtMI;
+ if (mi_match(SrcReg, MRI,
+ m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
+ m_GSExt(m_Reg(ExtSrc)))))) {
+ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+ Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
+ }
+
+ bool tryCombineTrunc(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelObserverWrapper &Observer) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
Builder.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
- // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
- Register TruncSrc;
- if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
- LLT DstTy = MRI.getType(DstReg);
- // Guess on the RHS shift amount type, which should be re-legalized if
- // applicable.
- if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) ||
- isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) ||
- isConstantUnsupported(DstTy))
+ // Try to fold trunc(g_constant) when the smaller constant type is legal.
+ // Can't use MIPattern because we don't have a specific constant in mind.
+ auto *SrcMI = MRI.getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ const LLT DstTy = MRI.getType(DstReg);
+ if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+ auto &CstVal = SrcMI->getOperand(1);
+ Builder.buildConstant(
+ DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *SrcMI, DeadInsts);
+ return true;
+ }
+ }
+
+ // Try to fold trunc(merge) to directly use the source of the merge.
+ // This gets rid of large, difficult to legalize, merges
+ if (SrcMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) {
+ const Register MergeSrcReg = SrcMI->getOperand(1).getReg();
+ const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
+ const LLT DstTy = MRI.getType(DstReg);
+
+ // We can only fold if the types are scalar
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
+ if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
return false;
- LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
- auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
- auto MIBShl = Builder.buildInstr(
- TargetOpcode::G_SHL, {DstTy},
- {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt});
- Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt});
- markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+
+ if (DstSize < MergeSrcSize) {
+ // When the merge source is larger than the destination, we can just
+ // truncate the merge source directly
+ if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
+ << MI);
+
+ Builder.buildTrunc(DstReg, MergeSrcReg);
+ UpdatedDefs.push_back(DstReg);
+ } else if (DstSize == MergeSrcSize) {
+ // If the sizes match we can simply try to replace the register
+ LLVM_DEBUG(
+ dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
+ << MI);
+ replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
+ Observer);
+ } else if (DstSize % MergeSrcSize == 0) {
+ // If the trunc size is a multiple of the merge source size we can use
+ // a smaller merge instead
+ if (isInstUnsupported(
+ {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
+ << MI);
+
+ const unsigned NumSrcs = DstSize / MergeSrcSize;
+ assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
+ "trunc(merge) should require less inputs than merge");
+ SmallVector<Register, 8> SrcRegs(NumSrcs);
+ for (unsigned i = 0; i < NumSrcs; ++i)
+ SrcRegs[i] = SrcMI->getOperand(i + 1).getReg();
+
+ Builder.buildMerge(DstReg, SrcRegs);
+ UpdatedDefs.push_back(DstReg);
+ } else {
+ // Unable to combine
+ return false;
+ }
+
+ markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}
- return tryFoldImplicitDef(MI, DeadInsts);
+
+ // trunc(trunc) -> trunc
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
+ // Always combine trunc(trunc) since the eventual resulting trunc must be
+ // legal anyway as it must be legal for all outputs of the consumer type
+ // set.
+ LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
+
+ Builder.buildTrunc(DstReg, TruncSrc);
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
+ return true;
+ }
+
+ return false;
}
/// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
bool tryFoldImplicitDef(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
unsigned Opcode = MI.getOpcode();
- if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT &&
- Opcode != TargetOpcode::G_SEXT)
- return false;
+ assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
+ Opcode == TargetOpcode::G_SEXT);
if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
MI.getOperand(1).getReg(), MRI)) {
@@ -184,10 +315,11 @@
if (Opcode == TargetOpcode::G_ANYEXT) {
// G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
- if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
+ if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
+ UpdatedDefs.push_back(DstReg);
} else {
// G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
// bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
@@ -195,6 +327,7 @@
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildConstant(DstReg, 0);
+ UpdatedDefs.push_back(DstReg);
}
markInstAndDefDead(MI, *DefMI, DeadInsts);
@@ -203,30 +336,248 @@
return false;
}
- static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) {
- if (OpTy.isVector() && DestTy.isVector())
- return TargetOpcode::G_CONCAT_VECTORS;
+ bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
- if (OpTy.isVector() && !DestTy.isVector())
- return TargetOpcode::G_BUILD_VECTOR;
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
- return TargetOpcode::G_MERGE_VALUES;
- }
+ const unsigned CastOpc = CastMI.getOpcode();
- bool tryCombineMerges(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
-
- if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ if (!isArtifactCast(CastOpc))
return false;
+ const unsigned NumDefs = MI.getNumOperands() - 1;
+
+ const Register CastSrcReg = CastMI.getOperand(1).getReg();
+ const LLT CastSrcTy = MRI.getType(CastSrcReg);
+ const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
+ const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
+
+ const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
+ const unsigned DestSize = DestTy.getSizeInBits();
+
+ if (CastOpc == TargetOpcode::G_TRUNC) {
+ if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
+ // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
+ // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
+ // =>
+ // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
+ // %2:_(s8) = G_TRUNC %6
+ // %3:_(s8) = G_TRUNC %7
+ // %4:_(s8) = G_TRUNC %8
+ // %5:_(s8) = G_TRUNC %9
+
+ unsigned UnmergeNumElts =
+ DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
+ LLT UnmergeTy = CastSrcTy.changeNumElements(UnmergeNumElts);
+
+ if (isInstUnsupported(
+ {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
+ return false;
+
+ Builder.setInstr(MI);
+ auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
+
+ for (unsigned I = 0; I != NumDefs; ++I) {
+ Register DefReg = MI.getOperand(I).getReg();
+ UpdatedDefs.push_back(DefReg);
+ Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
+ }
+
+ markInstAndDefDead(MI, CastMI, DeadInsts);
+ return true;
+ }
+
+ if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
+ // %1:_(s16) = G_TRUNC %0(s32)
+ // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
+ // =>
+ // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
+
+ // Unmerge(trunc) can be combined if the trunc source size is a multiple
+ // of the unmerge destination size
+ if (CastSrcSize % DestSize != 0)
+ return false;
+
+ // Check if the new unmerge is supported
+ if (isInstUnsupported(
+ {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
+ return false;
+
+ // Gather the original destination registers and create new ones for the
+ // unused bits
+ const unsigned NewNumDefs = CastSrcSize / DestSize;
+ SmallVector<Register, 8> DstRegs(NewNumDefs);
+ for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
+ if (Idx < NumDefs)
+ DstRegs[Idx] = MI.getOperand(Idx).getReg();
+ else
+ DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
+ }
+
+ // Build new unmerge
+ Builder.setInstr(MI);
+ Builder.buildUnmerge(DstRegs, CastSrcReg);
+ UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
+ markInstAndDefDead(MI, CastMI, DeadInsts);
+ return true;
+ }
+ }
+
+ // TODO: support combines with other casts as well
+ return false;
+ }
+
+ static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
+ LLT OpTy, LLT DestTy) {
+ // Check if we found a definition that is like G_MERGE_VALUES.
+ switch (MergeOp) {
+ default:
+ return false;
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_MERGE_VALUES:
+ // The convert operation that we will need to insert is
+ // going to convert the input of that type of instruction (scalar)
+ // to the destination type (DestTy).
+ // The conversion needs to stay in the same domain (scalar to scalar
+ // and vector to vector), so if we were to allow to fold the merge
+ // we would need to insert some bitcasts.
+ // E.g.,
+ // <2 x s16> = build_vector s16, s16
+ // <2 x s32> = zext <2 x s16>
+ // <2 x s16>, <2 x s16> = unmerge <2 x s32>
+ //
+ // As is the folding would produce:
+ // <2 x s16> = zext s16 <-- scalar to vector
+ // <2 x s16> = zext s16 <-- scalar to vector
+ // Which is invalid.
+ // Instead we would want to generate:
+ // s32 = zext s16
+ // <2 x s16> = bitcast s32
+ // s32 = zext s16
+ // <2 x s16> = bitcast s32
+ //
+ // That is not done yet.
+ if (ConvertOp == 0)
+ return true;
+ return !DestTy.isVector() && OpTy.isVector();
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ if (ConvertOp == 0)
+ return true;
+ if (!DestTy.isVector())
+ return false;
+
+ const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
+
+ // Don't handle scalarization with a cast that isn't in the same
+ // direction as the vector cast. This could be handled, but it would
+ // require more intermediate unmerges.
+ if (ConvertOp == TargetOpcode::G_TRUNC)
+ return DestTy.getSizeInBits() <= OpEltSize;
+ return DestTy.getSizeInBits() >= OpEltSize;
+ }
+ }
+ }
+
+ /// Try to replace DstReg with SrcReg or build a COPY instruction
+ /// depending on the register constraints.
+ static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelChangeObserver &Observer) {
+ if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
+ Builder.buildCopy(DstReg, SrcReg);
+ UpdatedDefs.push_back(DstReg);
+ return;
+ }
+ SmallVector<MachineInstr *, 4> UseMIs;
+ // Get the users and notify the observer before replacing.
+ for (auto &UseMI : MRI.use_instructions(DstReg)) {
+ UseMIs.push_back(&UseMI);
+ Observer.changingInstr(UseMI);
+ }
+ // Replace the registers.
+ MRI.replaceRegWith(DstReg, SrcReg);
+ UpdatedDefs.push_back(SrcReg);
+ // Notify the observer that we changed the instructions.
+ for (auto *UseMI : UseMIs)
+ Observer.changedInstr(*UseMI);
+ }
+
+ /// Return the operand index in \p MI that defines \p Def
+ static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
+ unsigned DefIdx = 0;
+ for (const MachineOperand &Def : MI.defs()) {
+ if (Def.getReg() == SearchDef)
+ break;
+ ++DefIdx;
+ }
+
+ return DefIdx;
+ }
+
+ bool tryCombineUnmergeValues(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelChangeObserver &Observer) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+
unsigned NumDefs = MI.getNumOperands() - 1;
- MachineInstr *SrcDef =
- getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI);
+ Register SrcReg = MI.getOperand(NumDefs).getReg();
+ MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
if (!SrcDef)
return false;
LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (SrcDef->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+ // %0:_(<4 x s16>) = G_FOO
+ // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
+ // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
+ //
+ // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
+ const unsigned NumSrcOps = SrcDef->getNumOperands();
+ Register SrcUnmergeSrc = SrcDef->getOperand(NumSrcOps - 1).getReg();
+ LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
+
+ // If we need to decrease the number of vector elements in the result type
+ // of an unmerge, this would involve the creation of an equivalent unmerge
+ // to copy back to the original result registers.
+ LegalizeActionStep ActionStep = LI.getAction(
+ {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
+ switch (ActionStep.Action) {
+ case LegalizeActions::Lower:
+ case LegalizeActions::Unsupported:
+ break;
+ case LegalizeActions::FewerElements:
+ case LegalizeActions::NarrowScalar:
+ if (ActionStep.TypeIdx == 1)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
+
+ // TODO: Should we try to process out the other defs now? If the other
+ // defs of the source unmerge are also unmerged, we end up with a separate
+ // unmerge for each one.
+ unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+ for (unsigned I = 0; I != NumDefs; ++I) {
+ Register Def = MI.getOperand(I).getReg();
+ replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
+ MRI, Builder, UpdatedDefs, Observer);
+ }
+
+ markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+ return true;
+ }
+
MachineInstr *MergeI = SrcDef;
unsigned ConvertOp = 0;
@@ -237,16 +588,17 @@
MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
}
- // FIXME: Handle scalarizing concat_vectors (scalar result type with vector
- // source)
- unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy);
- if (!MergeI || MergeI->getOpcode() != MergingOpcode)
- return false;
+ if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
+ ConvertOp, OpTy, DestTy)) {
+ // We might have a chance to combine later by trying to combine
+ // unmerge(cast) first
+ return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
+ }
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
if (NumMergeRegs < NumDefs) {
- if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0)
+ if (NumDefs % NumMergeRegs != 0)
return false;
Builder.setInstr(MI);
@@ -259,12 +611,45 @@
const unsigned NewNumDefs = NumDefs / NumMergeRegs;
for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
- SmallVector<Register, 2> DstRegs;
+ SmallVector<Register, 8> DstRegs;
for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
++j, ++DefIdx)
DstRegs.push_back(MI.getOperand(DefIdx).getReg());
- Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
+ if (ConvertOp) {
+ LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+
+ // This is a vector that is being split and casted. Extract to the
+ // element type, and do the conversion on the scalars (or smaller
+ // vectors).
+ LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
+
+ // Handle split to smaller vectors, with conversions.
+ // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
+ // %3(<8 x s16>) = G_SEXT %2
+ // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
+ //
+ // =>
+ //
+ // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
+ // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
+ // %4(<2 x s16>) = G_SEXT %8
+ // %5(<2 x s16>) = G_SEXT %9
+ // %6(<2 x s16>) = G_SEXT %10
+ // %7(<2 x s16>)= G_SEXT %11
+
+ SmallVector<Register, 4> TmpRegs(NewNumDefs);
+ for (unsigned k = 0; k < NewNumDefs; ++k)
+ TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
+
+ Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
+
+ for (unsigned k = 0; k < NewNumDefs; ++k)
+ Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
+ } else {
+ Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
+ }
+ UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
}
} else if (NumMergeRegs > NumDefs) {
@@ -281,36 +666,47 @@
const unsigned NumRegs = NumMergeRegs / NumDefs;
for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
- SmallVector<Register, 2> Regs;
+ SmallVector<Register, 8> Regs;
for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
++j, ++Idx)
Regs.push_back(MergeI->getOperand(Idx).getReg());
- Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+ Register DefReg = MI.getOperand(DefIdx).getReg();
+ Builder.buildMerge(DefReg, Regs);
+ UpdatedDefs.push_back(DefReg);
}
} else {
LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+
+ if (!ConvertOp && DestTy != MergeSrcTy)
+ ConvertOp = TargetOpcode::G_BITCAST;
+
if (ConvertOp) {
Builder.setInstr(MI);
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
- Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()},
- {MergeSrc});
+ Register DefReg = MI.getOperand(Idx).getReg();
+ Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
+ UpdatedDefs.push_back(DefReg);
}
markInstAndDefDead(MI, *MergeI, DeadInsts);
return true;
}
- // FIXME: is a COPY appropriate if the types mismatch? We know both
- // registers are allocatable by now.
- if (DestTy != MergeSrcTy)
- return false;
- for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
- MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
- MergeI->getOperand(Idx + 1).getReg());
+ assert(DestTy == MergeSrcTy &&
+ "Bitcast and the other kinds of conversions should "
+ "have happened earlier");
+
+ Builder.setInstr(MI);
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
+ replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
+ Observer);
+ }
}
markInstAndDefDead(MI, *MergeI, DeadInsts);
@@ -329,7 +725,8 @@
}
bool tryCombineExtract(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
// Try to use the source registers from a G_MERGE_VALUES
@@ -344,13 +741,14 @@
// for N >= %2.getSizeInBits() / 2
// %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
- unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
- MachineInstr *MergeI = MRI.getVRegDef(Src);
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
return false;
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(Src);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
// TODO: Do we need to check if the resulting extract is supported?
unsigned ExtractDstSize = DstTy.getSizeInBits();
@@ -368,10 +766,9 @@
// TODO: We could modify MI in place in most cases.
Builder.setInstr(MI);
- Builder.buildExtract(
- MI.getOperand(0).getReg(),
- MergeI->getOperand(MergeSrcIdx + 1).getReg(),
- Offset - MergeSrcIdx * MergeSrcSize);
+ Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
+ Offset - MergeSrcIdx * MergeSrcSize);
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MergeI, DeadInsts);
return true;
}
@@ -388,55 +785,117 @@
// etc, process the dead instructions now if any.
if (!DeadInsts.empty())
deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
+
+ // Put here every vreg that was redefined in such a way that it's at least
+ // possible that one (or more) of its users (immediate or COPY-separated)
+ // could become artifact combinable with the new definition (or the
+ // instruction reachable from it through a chain of copies if any).
+ SmallVector<Register, 4> UpdatedDefs;
+ bool Changed = false;
switch (MI.getOpcode()) {
default:
return false;
case TargetOpcode::G_ANYEXT:
- return tryCombineAnyExt(MI, DeadInsts);
+ Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_ZEXT:
- return tryCombineZExt(MI, DeadInsts);
+ Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
+ break;
case TargetOpcode::G_SEXT:
- return tryCombineSExt(MI, DeadInsts);
+ Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_UNMERGE_VALUES:
- return tryCombineMerges(MI, DeadInsts);
+ Changed =
+ tryCombineUnmergeValues(MI, DeadInsts, UpdatedDefs, WrapperObserver);
+ break;
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ // If any of the users of this merge are an unmerge, then add them to the
+ // artifact worklist in case there's folding that can be done looking up.
+ for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
+ if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
+ U.getOpcode() == TargetOpcode::G_TRUNC) {
+ UpdatedDefs.push_back(MI.getOperand(0).getReg());
+ break;
+ }
+ }
+ break;
case TargetOpcode::G_EXTRACT:
- return tryCombineExtract(MI, DeadInsts);
- case TargetOpcode::G_TRUNC: {
- bool Changed = false;
- for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
- Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver);
- return Changed;
+ Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
+ break;
+ case TargetOpcode::G_TRUNC:
+ Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
+ if (!Changed) {
+ // Try to combine truncates away even if they are legal. As all artifact
+ // combines at the moment look only "up" the def-use chains, we achieve
+ // that by throwing truncates' users (with look through copies) into the
+ // ArtifactList again.
+ UpdatedDefs.push_back(MI.getOperand(0).getReg());
+ }
+ break;
}
+ // If the main loop through the ArtifactList found at least one combinable
+ // pair of artifacts, not only combine it away (as done above), but also
+ // follow the def-use chain from there to combine everything that can be
+ // combined within this def-use chain of artifacts.
+ while (!UpdatedDefs.empty()) {
+ Register NewDef = UpdatedDefs.pop_back_val();
+ assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
+ for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
+ switch (Use.getOpcode()) {
+ // Keep this list in sync with the list of all artifact combines.
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_TRUNC:
+ // Adding Use to ArtifactList.
+ WrapperObserver.changedInstr(Use);
+ break;
+ case TargetOpcode::COPY: {
+ Register Copy = Use.getOperand(0).getReg();
+ if (Copy.isVirtual())
+ UpdatedDefs.push_back(Copy);
+ break;
+ }
+ default:
+ // If we do not have an artifact combine for the opcode, there is no
+ // point in adding it to the ArtifactList as nothing interesting will
+ // be done to it anyway.
+ break;
+ }
+ }
}
+ return Changed;
}
private:
-
- static unsigned getArtifactSrcReg(const MachineInstr &MI) {
+ static Register getArtifactSrcReg(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case TargetOpcode::COPY:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
- case TargetOpcode::G_UNMERGE_VALUES:
- return MI.getOperand(MI.getNumOperands() - 1).getReg();
case TargetOpcode::G_EXTRACT:
return MI.getOperand(1).getReg();
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return MI.getOperand(MI.getNumOperands() - 1).getReg();
default:
llvm_unreachable("Not a legalization artifact happen");
}
}
- /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
- /// dead due to MI being killed, then mark DefMI as dead too.
- /// Some of the combines (extends(trunc)), try to walk through redundant
- /// copies in between the extends and the truncs, and this attempts to collect
- /// the in between copies if they're dead.
- void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
- DeadInsts.push_back(&MI);
-
+ /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
+ /// (either by killing it or changing operands) results in DefMI being dead
+ /// too. In-between COPYs or artifact-casts are also collected if they are
+ /// dead.
+ /// MI is not marked dead.
+ void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ unsigned DefIdx = 0) {
// Collect all the copy instructions that are made dead, due to deleting
// this instruction. Collect all of them until the Trunc(DefMI).
// Eg,
@@ -448,7 +907,7 @@
// and as a result, %3, %2, %1 are dead.
MachineInstr *PrevMI = &MI;
while (PrevMI != &DefMI) {
- unsigned PrevRegSrc = getArtifactSrcReg(*PrevMI);
+ Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
if (MRI.hasOneUse(PrevRegSrc)) {
@@ -463,8 +922,39 @@
break;
PrevMI = TmpDef;
}
- if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg()))
- DeadInsts.push_back(&DefMI);
+
+ if (PrevMI == &DefMI) {
+ unsigned I = 0;
+ bool IsDead = true;
+ for (MachineOperand &Def : DefMI.defs()) {
+ if (I != DefIdx) {
+ if (!MRI.use_empty(Def.getReg())) {
+ IsDead = false;
+ break;
+ }
+ } else {
+ if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
+ break;
+ }
+
+ ++I;
+ }
+
+ if (IsDead)
+ DeadInsts.push_back(&DefMI);
+ }
+ }
+
+ /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
+ /// dead due to MI being killed, then mark DefMI as dead too.
+ /// Some of the combines (extends(trunc)), try to walk through redundant
+ /// copies in between the extends and the truncs, and this attempts to collect
+ /// the in between copies if they're dead.
+ void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ unsigned DefIdx = 0) {
+ DeadInsts.push_back(&MI);
+ markDefDead(MI, DefMI, DeadInsts, DefIdx);
}
/// Erase the dead instructions in the list and call the observer hooks.
@@ -506,7 +996,7 @@
/// Looks through copy instructions and returns the actual
/// source register.
- unsigned lookThroughCopyInstrs(Register Reg) {
+ Register lookThroughCopyInstrs(Register Reg) {
Register TmpReg;
while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
if (MRI.getType(TmpReg).isValid())
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Legalizer.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 13cf3f7..690e84f 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -26,13 +26,18 @@
namespace llvm {
class MachineRegisterInfo;
+class LostDebugLocObserver;
class Legalizer : public MachineFunctionPass {
public:
static char ID;
-private:
+ struct MFResult {
+ bool Changed;
+ const MachineInstr *FailedOn;
+ };
+private:
/// Initialize the field members using \p MF.
void init(MachineFunction &MF);
@@ -55,14 +60,17 @@
}
MachineFunctionProperties getClearedProperties() const override {
- return MachineFunctionProperties()
- .set(MachineFunctionProperties::Property::NoPHIs);
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
}
- bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII);
-
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static MFResult
+ legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+ ArrayRef<GISelChangeObserver *> AuxObservers,
+ LostDebugLocObserver &LocObserver,
+ MachineIRBuilder &MIRBuilder);
};
} // End namespace llvm.
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index e8cb65f..2e9c7d8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -32,9 +32,23 @@
class Legalizer;
class MachineRegisterInfo;
class GISelChangeObserver;
+class TargetLowering;
class LegalizerHelper {
public:
+ /// Expose MIRBuilder so clients can set their own RecordInsertInstruction
+ /// functions
+ MachineIRBuilder &MIRBuilder;
+
+ /// To keep track of changes made by the LegalizerHelper.
+ GISelChangeObserver &Observer;
+
+private:
+ MachineRegisterInfo &MRI;
+ const LegalizerInfo &LI;
+ const TargetLowering &TLI;
+
+public:
enum LegalizeResult {
/// Instruction was already legal and no change was made to the
/// MachineFunction.
@@ -48,6 +62,10 @@
UnableToLegalize,
};
+ /// Expose LegalizerInfo so the clients can re-use.
+ const LegalizerInfo &getLegalizerInfo() const { return LI; }
+ const TargetLowering &getTargetLowering() const { return TLI; }
+
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
MachineIRBuilder &B);
LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
@@ -74,6 +92,9 @@
/// precision, ignoring the unused bits).
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+ /// Legalize an instruction by replacing the value type
+ LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
/// Legalize an instruction by splitting it into simpler parts, hopefully
/// understood by the target.
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
@@ -88,14 +109,13 @@
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy);
- /// Expose MIRBuilder so clients can set their own RecordInsertInstruction
- /// functions
- MachineIRBuilder &MIRBuilder;
+ /// Cast the given value to an LLT::scalar with an equivalent size. Returns
+ /// the register to use if an instruction was inserted. Returns the original
+ /// register if no coercion was necessary.
+ //
+ // This may also fail and return Register() if there is no legal way to cast.
+ Register coerceToScalar(Register Val);
- /// Expose LegalizerInfo so the clients can re-use.
- const LegalizerInfo &getLegalizerInfo() const { return LI; }
-
-private:
/// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
/// Use by extending the operand's type to \p WideTy using the specified \p
/// ExtOpcode for the extension instruction, and replacing the vreg of the
@@ -129,6 +149,19 @@
/// original vector type, and replacing the vreg of the operand in place.
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
+ /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ /// use by inserting a G_BITCAST to \p CastTy
+ void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
+
+ /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ /// def by inserting a G_BITCAST from \p CastTy
+ void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
+
+ /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with
+ /// G_IMPLICIT_DEF, and producing dead results.
+ Register widenWithUnmerge(LLT WideTy, Register OrigReg);
+
+private:
LegalizeResult
widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
LegalizeResult
@@ -137,6 +170,8 @@
widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
LegalizeResult
widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+ LegalizeResult
+ widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
/// Helper function to split a wide generic register into bitwise blocks with
/// the given Type (which implies the number of blocks needed). The generic
@@ -163,6 +198,44 @@
LLT PartTy, ArrayRef<Register> PartRegs,
LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
+ /// Unmerge \p SrcReg into smaller sized values, and append them to \p
+ /// Parts. The elements of \p Parts will be the greatest common divisor type
+ /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and
+ /// return the GCD type.
+ LLT extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg);
+
+ /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of
+ /// the unpacked registers to \p Parts. This version is if the common unmerge
+ /// type is already known.
+ void extractGCDType(SmallVectorImpl<Register> &Parts, LLT GCDTy,
+ Register SrcReg);
+
+ /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge
+ /// from the least common multiple type, and convert as appropriate to \p
+ /// DstReg.
+ ///
+ /// \p VRegs should each have type \p GCDTy. This type should be greatest
+ /// common divisor type of \p DstReg, \p NarrowTy, and an undetermined source
+ /// type.
+ ///
+ /// \p NarrowTy is the desired result merge source type. If the source value
+ /// needs to be widened to evenly cover \p DstReg, inserts high bits
+ /// corresponding to the extension opcode \p PadStrategy.
+ ///
+ /// \p VRegs will be cleared, and the the result \p NarrowTy register pieces
+ /// will replace it. Returns The complete LCMTy that \p VRegs will cover when
+ /// merged.
+ LLT buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
+ SmallVectorImpl<Register> &VRegs,
+ unsigned PadStrategy = TargetOpcode::G_ANYEXT);
+
+ /// Merge the values in \p RemergeRegs to an \p LCMTy typed value. Extract the
+ /// low bits into \p DstReg. This is intended to use the outputs from
+ /// buildLCMMergePieces after processing.
+ void buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
+ ArrayRef<Register> RemergeRegs);
+
/// Perform generic multiplication of values held in multiple registers.
/// Generated instructions use only types NarrowTy and i1.
/// Destination can be same or two times size of the source.
@@ -170,14 +243,26 @@
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs, LLT NarrowTy);
+ void changeOpcode(MachineInstr &MI, unsigned NewOpcode);
+
+public:
+ /// Return the alignment to use for a stack temporary object with the given
+ /// type.
+ Align getStackTemporaryAlignment(LLT Type, Align MinAlign = Align()) const;
+
+ /// Create a stack temporary based on the size in bytes and the alignment
+ MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo);
+
+ /// Get a pointer to vector element \p Index located in memory for a vector of
+ /// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out
+ /// of bounds the returned pointer is unspecified, but will be within the
+ /// vector bounds.
+ Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index);
+
LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
unsigned TypeIdx, LLT NarrowTy);
- /// Legalize a simple vector instruction where all operands are the same type
- /// by splitting into multiple components.
- LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy);
-
/// Legalize a instruction with a vector type where each operand may have a
/// different element type. All type indexes must have the same number of
/// elements.
@@ -199,9 +284,31 @@
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy);
+ LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy);
+ LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+ LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy);
+
LegalizeResult
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+ /// Legalize an instruction by reducing the operation width, either by
+ /// narrowing the type of the operation or by reducing the number of elements
+ /// of a vector.
+ /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy.
+ /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy differ,
+ /// fewerElements is used when the scalar type is the same but the number of
+ /// elements between \p NarrowTy and \p DstTy differ.
+ LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
+ LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
LLT HalfTy, LLT ShiftAmtTy);
@@ -211,28 +318,78 @@
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
+ LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy);
+
+ /// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
+ LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy);
+
+ LegalizeResult lowerBitcast(MachineInstr &MI);
+ LegalizeResult lowerLoad(MachineInstr &MI);
+ LegalizeResult lowerStore(MachineInstr &MI);
+ LegalizeResult lowerBitCount(MachineInstr &MI);
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
- LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerUITOFP(MachineInstr &MI);
+ LegalizeResult lowerSITOFP(MachineInstr &MI);
+ LegalizeResult lowerFPTOUI(MachineInstr &MI);
+ LegalizeResult lowerFPTOSI(MachineInstr &MI);
- MachineRegisterInfo &MRI;
- const LegalizerInfo &LI;
- /// To keep track of changes made by the LegalizerHelper.
- GISelChangeObserver &Observer;
+ LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
+ LegalizeResult lowerFPTRUNC(MachineInstr &MI);
+ LegalizeResult lowerFPOWI(MachineInstr &MI);
+
+ LegalizeResult lowerMinMax(MachineInstr &MI);
+ LegalizeResult lowerFCopySign(MachineInstr &MI);
+ LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
+ LegalizeResult lowerFMad(MachineInstr &MI);
+ LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
+ LegalizeResult lowerFFloor(MachineInstr &MI);
+ LegalizeResult lowerMergeValues(MachineInstr &MI);
+ LegalizeResult lowerUnmergeValues(MachineInstr &MI);
+ LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI);
+ LegalizeResult lowerShuffleVector(MachineInstr &MI);
+ LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
+ LegalizeResult lowerExtract(MachineInstr &MI);
+ LegalizeResult lowerInsert(MachineInstr &MI);
+ LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
+ LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI);
+ LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI);
+ LegalizeResult lowerShlSat(MachineInstr &MI);
+ LegalizeResult lowerBswap(MachineInstr &MI);
+ LegalizeResult lowerBitreverse(MachineInstr &MI);
+ LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
+ LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
+ LegalizeResult lowerSelect(MachineInstr &MI);
+
};
+/// Helper function that creates a libcall to the given \p Name using the given
+/// calling convention \p CC.
+LegalizerHelper::LegalizeResult
+createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC);
+
/// Helper function that creates the given libcall.
LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args);
+/// Create a libcall to memcpy et al.
+LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI,
+ MachineInstr &MI);
+
} // End namespace llvm.
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 513c98f..c8a54b8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -22,8 +22,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <tuple>
@@ -34,6 +35,7 @@
extern cl::opt<bool> DisableGISelLegalityCheck;
+class LegalizerHelper;
class MachineInstr;
class MachineIRBuilder;
class MachineRegisterInfo;
@@ -67,6 +69,9 @@
/// the first two results.
MoreElements,
+ /// Perform the operation on a different, but equivalently sized type.
+ Bitcast,
+
/// The operation itself must be expressed in terms of simpler actions on
/// this target. E.g. a SREM replaced by an SDIV and subtraction.
Lower,
@@ -152,7 +157,7 @@
LLT NewType;
LegalizeActionStep(LegalizeAction Action, unsigned TypeIdx,
- const LLT &NewType)
+ const LLT NewType)
: Action(Action), TypeIdx(TypeIdx), NewType(NewType) {}
bool operator==(const LegalizeActionStep &RHS) const {
@@ -178,7 +183,7 @@
MemSize == Other.MemSize;
}
- /// \returns true if this memory access is legal with for the acecss described
+ /// \returns true if this memory access is legal with for the access described
/// by \p Other (The alignment is sufficient for the size and result type).
bool isCompatible(const TypePairAndMemDesc &Other) const {
return Type0 == Other.Type0 && Type1 == Other.Type1 &&
@@ -199,11 +204,33 @@
Predicate all(Predicate P0, Predicate P1, Args... args) {
return all(all(P0, P1), args...);
}
-/// True iff the given type index is the specified types.
+
+/// True iff P0 or P1 are true.
+template<typename Predicate>
+Predicate any(Predicate P0, Predicate P1) {
+ return [=](const LegalityQuery &Query) {
+ return P0(Query) || P1(Query);
+ };
+}
+/// True iff any given predicates are true.
+template<typename Predicate, typename... Args>
+Predicate any(Predicate P0, Predicate P1, Args... args) {
+ return any(any(P0, P1), args...);
+}
+
+/// True iff the given type index is the specified type.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit);
/// True iff the given type index is one of the specified types.
LegalityPredicate typeInSet(unsigned TypeIdx,
std::initializer_list<LLT> TypesInit);
+
+/// True iff the given type index is not the specified type.
+inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx] != Type;
+ };
+}
+
/// True iff the given types for the given pair of type indexes is one of the
/// specified type pairs.
LegalityPredicate
@@ -224,13 +251,16 @@
/// space.
LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
+/// True if the type index is a vector with element type \p EltTy
+LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy);
+
/// True iff the specified type index is a scalar that's narrower than the given
/// size.
-LegalityPredicate narrowerThan(unsigned TypeIdx, unsigned Size);
+LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size);
/// True iff the specified type index is a scalar that's wider than the given
/// size.
-LegalityPredicate widerThan(unsigned TypeIdx, unsigned Size);
+LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size);
/// True iff the specified type index is a scalar or vector with an element type
/// that's narrower than the given size.
@@ -248,8 +278,20 @@
/// is not a power of 2.
LegalityPredicate scalarOrEltSizeNotPow2(unsigned TypeIdx);
+/// True if the total bitwidth of the specified type index is \p Size bits.
+LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size);
+
/// True iff the specified type indices are both the same bit size.
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1);
+
+/// True iff the first type index has a larger total bit size than second type
+/// index.
+LegalityPredicate largerThan(unsigned TypeIdx0, unsigned TypeIdx1);
+
+/// True iff the first type index has a smaller total bit size than second type
+/// index.
+LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1);
+
/// True iff the specified MMO index has a size that is not a power of 2
LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx);
/// True iff the specified type index is a vector whose element count is not a
@@ -274,6 +316,11 @@
/// Keep the same scalar or element type as the given type.
LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
+/// Change the scalar size or element size to have the same scalar size as type
+/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and
+/// only changes the size.
+LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
+
/// Widen the scalar type or vector element type for the given type index to the
/// next power of 2.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -331,6 +378,8 @@
/// individually handled.
SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC -
MCOI::OPERAND_FIRST_GENERIC + 2};
+ SmallBitVector ImmIdxsCovered{MCOI::OPERAND_LAST_GENERIC_IMM -
+ MCOI::OPERAND_FIRST_GENERIC_IMM + 2};
#endif
unsigned typeIdx(unsigned TypeIdx) {
@@ -342,9 +391,21 @@
#endif
return TypeIdx;
}
- void markAllTypeIdxsAsCovered() {
+
+ unsigned immIdx(unsigned ImmIdx) {
+ assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
+ MCOI::OPERAND_FIRST_GENERIC_IMM) &&
+ "Imm Index is out of bounds");
+#ifndef NDEBUG
+ ImmIdxsCovered.set(ImmIdx);
+#endif
+ return ImmIdx;
+ }
+
+ void markAllIdxsAsCovered() {
#ifndef NDEBUG
TypeIdxsCovered.set();
+ ImmIdxsCovered.set();
#endif
}
@@ -403,6 +464,23 @@
return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types),
Mutation);
}
+ /// Use the given action when type index 0 is any type in the given list and
+ /// imm index 0 is anything. Action should not be an action that requires
+ /// mutation.
+ LegalizeRuleSet &actionForTypeWithAnyImm(LegalizeAction Action,
+ std::initializer_list<LLT> Types) {
+ using namespace LegalityPredicates;
+ immIdx(0); // Inform verifier imm idx 0 is handled.
+ return actionIf(Action, typeInSet(typeIdx(0), Types));
+ }
+
+ LegalizeRuleSet &actionForTypeWithAnyImm(
+ LegalizeAction Action, std::initializer_list<std::pair<LLT, LLT>> Types) {
+ using namespace LegalityPredicates;
+ immIdx(0); // Inform verifier imm idx 0 is handled.
+ return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types));
+ }
+
/// Use the given action when type indexes 0 and 1 are both in the given list.
/// That is, the type pair is in the cartesian product of the list.
/// Action should not be an action that requires mutation.
@@ -454,7 +532,7 @@
LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that the free-form
// user-provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Legal, Predicate);
}
/// The instruction is legal when type index 0 is any type in the given list.
@@ -466,6 +544,19 @@
LegalizeRuleSet &legalFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
return actionFor(LegalizeAction::Legal, Types);
}
+ /// The instruction is legal when type index 0 is any type in the given list
+ /// and imm index 0 is anything.
+ LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) {
+ markAllIdxsAsCovered();
+ return actionForTypeWithAnyImm(LegalizeAction::Legal, Types);
+ }
+
+ LegalizeRuleSet &legalForTypeWithAnyImm(
+ std::initializer_list<std::pair<LLT, LLT>> Types) {
+ markAllIdxsAsCovered();
+ return actionForTypeWithAnyImm(LegalizeAction::Legal, Types);
+ }
+
/// The instruction is legal when type indexes 0 and 1 along with the memory
/// size and minimum alignment is any type and size tuple in the given list.
LegalizeRuleSet &legalForTypesWithMemDesc(
@@ -497,16 +588,25 @@
LegalizeRuleSet &alwaysLegal() {
using namespace LegalizeMutations;
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Legal, always);
}
+ /// The specified type index is coerced if predicate is true.
+ LegalizeRuleSet &bitcastIf(LegalityPredicate Predicate,
+ LegalizeMutation Mutation) {
+ // We have no choice but conservatively assume that lowering with a
+ // free-form user provided Predicate properly handles all type indices:
+ markAllIdxsAsCovered();
+ return actionIf(LegalizeAction::Bitcast, Predicate, Mutation);
+ }
+
/// The instruction is lowered.
LegalizeRuleSet &lower() {
using namespace LegalizeMutations;
// We have no choice but conservatively assume that predicate-less lowering
// properly handles all type indices by design:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, always);
}
/// The instruction is lowered if predicate is true. Keep type index 0 as the
@@ -515,7 +615,7 @@
using namespace LegalizeMutations;
// We have no choice but conservatively assume that lowering with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, Predicate);
}
/// The instruction is lowered if predicate is true.
@@ -523,14 +623,13 @@
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that lowering with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, Predicate, Mutation);
}
/// The instruction is lowered when type index 0 is any type in the given
/// list. Keep type index 0 as the same type.
LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types) {
- return actionFor(LegalizeAction::Lower, Types,
- LegalizeMutations::changeTo(0, 0));
+ return actionFor(LegalizeAction::Lower, Types);
}
/// The instruction is lowered when type index 0 is any type in the given
/// list.
@@ -541,8 +640,7 @@
/// The instruction is lowered when type indexes 0 and 1 is any type pair in
/// the given list. Keep type index 0 as the same type.
LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
- return actionFor(LegalizeAction::Lower, Types,
- LegalizeMutations::changeTo(0, 0));
+ return actionFor(LegalizeAction::Lower, Types);
}
/// The instruction is lowered when type indexes 0 and 1 is any type pair in
/// the given list.
@@ -567,11 +665,20 @@
Types2);
}
+ /// The instruction is emitted as a library call.
+ LegalizeRuleSet &libcall() {
+ using namespace LegalizeMutations;
+ // We have no choice but conservatively assume that predicate-less lowering
+ // properly handles all type indices by design:
+ markAllIdxsAsCovered();
+ return actionIf(LegalizeAction::Libcall, always);
+ }
+
/// Like legalIf, but for the Libcall action.
LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a libcall with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Libcall, Predicate);
}
LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) {
@@ -597,7 +704,7 @@
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation);
}
/// Narrow the scalar to the one selected by the mutation if the predicate is
@@ -606,9 +713,16 @@
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation);
}
+ /// Narrow the scalar, specified in mutation, when type indexes 0 and 1 is any
+ /// type pair in the given list.
+ LegalizeRuleSet &
+ narrowScalarFor(std::initializer_list<std::pair<LLT, LLT>> Types,
+ LegalizeMutation Mutation) {
+ return actionFor(LegalizeAction::NarrowScalar, Types, Mutation);
+ }
/// Add more elements to reach the type selected by the mutation if the
/// predicate is true.
@@ -616,7 +730,7 @@
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::MoreElements, Predicate, Mutation);
}
/// Remove elements to reach the type selected by the mutation if the
@@ -625,26 +739,36 @@
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::FewerElements, Predicate, Mutation);
}
/// The instruction is unsupported.
LegalizeRuleSet &unsupported() {
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Unsupported, always);
}
LegalizeRuleSet &unsupportedIf(LegalityPredicate Predicate) {
return actionIf(LegalizeAction::Unsupported, Predicate);
}
+
+ LegalizeRuleSet &unsupportedFor(std::initializer_list<LLT> Types) {
+ return actionFor(LegalizeAction::Unsupported, Types);
+ }
+
LegalizeRuleSet &unsupportedIfMemSizeNotPow2() {
return actionIf(LegalizeAction::Unsupported,
LegalityPredicates::memSizeInBytesNotPow2(0));
}
+ LegalizeRuleSet &lowerIfMemSizeNotPow2() {
+ return actionIf(LegalizeAction::Lower,
+ LegalityPredicates::memSizeInBytesNotPow2(0));
+ }
LegalizeRuleSet &customIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a custom action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Custom, Predicate);
}
LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) {
@@ -703,8 +827,15 @@
LegalizeMutations::scalarize(TypeIdx));
}
+ LegalizeRuleSet &scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx) {
+ using namespace LegalityPredicates;
+ return actionIf(LegalizeAction::FewerElements,
+ all(Predicate, isVector(typeIdx(TypeIdx))),
+ LegalizeMutations::scalarize(TypeIdx));
+ }
+
/// Ensure the scalar or element is at least as wide as Ty.
- LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+ LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(LegalizeAction::WidenScalar,
@@ -714,7 +845,7 @@
/// Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet &minScalarOrEltIf(LegalityPredicate Predicate,
- unsigned TypeIdx, const LLT &Ty) {
+ unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(LegalizeAction::WidenScalar,
@@ -724,16 +855,16 @@
}
/// Ensure the scalar is at least as wide as Ty.
- LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT &Ty) {
+ LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(LegalizeAction::WidenScalar,
- narrowerThan(TypeIdx, Ty.getSizeInBits()),
+ scalarNarrowerThan(TypeIdx, Ty.getSizeInBits()),
changeTo(typeIdx(TypeIdx), Ty));
}
/// Ensure the scalar is at most as wide as Ty.
- LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+ LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(LegalizeAction::NarrowScalar,
@@ -742,11 +873,11 @@
}
/// Ensure the scalar is at most as wide as Ty.
- LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT &Ty) {
+ LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(LegalizeAction::NarrowScalar,
- widerThan(TypeIdx, Ty.getSizeInBits()),
+ scalarWiderThan(TypeIdx, Ty.getSizeInBits()),
changeTo(typeIdx(TypeIdx), Ty));
}
@@ -754,27 +885,30 @@
/// For example, when the maximum size of one type depends on the size of
/// another such as extracting N bits from an M bit container.
LegalizeRuleSet &maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx,
- const LLT &Ty) {
+ const LLT Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
return actionIf(
LegalizeAction::NarrowScalar,
[=](const LegalityQuery &Query) {
- return widerThan(TypeIdx, Ty.getSizeInBits()) && Predicate(Query);
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() &&
+ QueryTy.getSizeInBits() > Ty.getSizeInBits() &&
+ Predicate(Query);
},
changeElementTo(typeIdx(TypeIdx), Ty));
}
/// Limit the range of scalar sizes to MinTy and MaxTy.
- LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT &MinTy,
- const LLT &MaxTy) {
+ LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT MinTy,
+ const LLT MaxTy) {
assert(MinTy.isScalar() && MaxTy.isScalar() && "Expected scalar types");
return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
}
/// Limit the range of scalar sizes to MinTy and MaxTy.
- LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy,
- const LLT &MaxTy) {
+ LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT MinTy,
+ const LLT MaxTy) {
return minScalarOrElt(TypeIdx, MinTy).maxScalarOrElt(TypeIdx, MaxTy);
}
@@ -786,11 +920,25 @@
return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
Query.Types[TypeIdx].getSizeInBits();
},
+ LegalizeMutations::changeElementSizeTo(TypeIdx, LargeTypeIdx));
+ }
+
+ /// Narrow the scalar to match the size of another.
+ LegalizeRuleSet &maxScalarSameAs(unsigned TypeIdx, unsigned NarrowTypeIdx) {
+ typeIdx(TypeIdx);
+ return narrowScalarIf(
[=](const LegalityQuery &Query) {
- LLT T = Query.Types[LargeTypeIdx];
- return std::make_pair(TypeIdx,
- T.isVector() ? T.getElementType() : T);
- });
+ return Query.Types[NarrowTypeIdx].getScalarSizeInBits() <
+ Query.Types[TypeIdx].getSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(TypeIdx, NarrowTypeIdx));
+ }
+
+ /// Change the type \p TypeIdx to have the same scalar size as type \p
+ /// SameSizeIdx.
+ LegalizeRuleSet &scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx) {
+ return minScalarSameAs(TypeIdx, SameSizeIdx)
+ .maxScalarSameAs(TypeIdx, SameSizeIdx);
}
/// Conditionally widen the scalar or elt to match the size of another.
@@ -820,7 +968,7 @@
}
/// Limit the number of elements in EltTy vectors to at least MinElements.
- LegalizeRuleSet &clampMinNumElements(unsigned TypeIdx, const LLT &EltTy,
+ LegalizeRuleSet &clampMinNumElements(unsigned TypeIdx, const LLT EltTy,
unsigned MinElements) {
// Mark the type index as covered:
typeIdx(TypeIdx);
@@ -838,7 +986,7 @@
});
}
/// Limit the number of elements in EltTy vectors to at most MaxElements.
- LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy,
+ LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT EltTy,
unsigned MaxElements) {
// Mark the type index as covered:
typeIdx(TypeIdx);
@@ -861,12 +1009,12 @@
/// No effect if the type is not a vector or does not have the same element
/// type as the constraints.
/// The element type of MinTy and MaxTy must match.
- LegalizeRuleSet &clampNumElements(unsigned TypeIdx, const LLT &MinTy,
- const LLT &MaxTy) {
+ LegalizeRuleSet &clampNumElements(unsigned TypeIdx, const LLT MinTy,
+ const LLT MaxTy) {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");
- const LLT &EltTy = MinTy.getElementType();
+ const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
}
@@ -882,6 +1030,10 @@
/// LegalizeRuleSet in any way at all.
/// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set.
bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const;
+ /// Check if there is no imm index which is obviously not handled by the
+ /// LegalizeRuleSet in any way at all.
+ /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set.
+ bool verifyImmIdxsCoverage(unsigned NumImmIdxs) const;
/// Apply the ruleset to the given LegalityQuery.
LegalizeActionStep apply(const LegalityQuery &Query) const;
@@ -1106,18 +1258,36 @@
bool isLegal(const LegalityQuery &Query) const {
return getAction(Query).Action == LegalizeAction::Legal;
}
+
+ bool isLegalOrCustom(const LegalityQuery &Query) const {
+ auto Action = getAction(Query).Action;
+ return Action == LegalizeAction::Legal || Action == LegalizeAction::Custom;
+ }
+
bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
bool isLegalOrCustom(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
- virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const;
+ /// Called for instructions with the Custom LegalizationAction.
+ virtual bool legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ llvm_unreachable("must implement this if custom action is used");
+ }
+ /// \returns true if MI is either legal or has been legalized and false if not
+ /// legal.
/// Return true if MI is either legal or has been legalized and false
/// if not legal.
- virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ virtual bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ return true;
+ }
+
+ /// Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while
+ /// widening a constant of type SmallTy which targets can override.
+ /// For eg, the DAG does (SmallTy.isByteSized() ? G_SEXT : G_ZEXT) which
+ /// will be the default.
+ virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const;
private:
/// Determine what action should be taken to legalize the given generic
@@ -1141,7 +1311,7 @@
/// {65, NarrowScalar} // bit sizes [65, +inf[
/// });
/// It may be that only 64-bit pointers are supported on your target:
- /// setPointerAction(G_GEP, 0, LLT:pointer(1),
+ /// setPointerAction(G_PTR_ADD, 0, LLT:pointer(1),
/// {{1, Unsupported}, // bit sizes [ 1, 63[
/// {64, Legal}, // bit sizes [64, 65[
/// {65, Unsupported}, // bit sizes [65, +inf[
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Localizer.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Localizer.h
index 06de580..57f6c03 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -42,15 +42,16 @@
static char ID;
private:
+ /// An input function to decide if the pass should run or not
+ /// on the given MachineFunction.
+ std::function<bool(const MachineFunction &)> DoNotRunPass;
+
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
MachineRegisterInfo *MRI;
/// TTI used for getting remat costs for instructions.
TargetTransformInfo *TTI;
- /// Check whether or not \p MI needs to be moved close to its uses.
- bool shouldLocalize(const MachineInstr &MI);
-
/// Check if \p MOUse is used in the same basic block as \p Def.
/// If the use is in the same block, we say it is local.
/// When the use is not local, \p InsertMBB will contain the basic
@@ -72,14 +73,13 @@
public:
Localizer();
+ Localizer(std::function<bool(const MachineFunction &)>);
StringRef getPassName() const override { return "Localizer"; }
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties()
- .set(MachineFunctionProperties::Property::IsSSA)
- .set(MachineFunctionProperties::Property::Legalized)
- .set(MachineFunctionProperties::Property::RegBankSelected);
+ .set(MachineFunctionProperties::Property::IsSSA);
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
new file mode 100644
index 0000000..cd2a871
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
@@ -0,0 +1,50 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.h -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+#define LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+namespace llvm {
+class LostDebugLocObserver : public GISelChangeObserver {
+ StringRef DebugType;
+ SmallSet<DebugLoc, 4> LostDebugLocs;
+ SmallPtrSet<MachineInstr *, 4> PotentialMIsForDebugLocs;
+ unsigned NumLostDebugLocs = 0;
+
+public:
+ LostDebugLocObserver(StringRef DebugType) : DebugType(DebugType) {}
+
+ unsigned getNumLostDebugLocs() const { return NumLostDebugLocs; }
+
+ /// Call this to indicate that it's a good point to assess whether locations
+ /// have been lost. Typically this will be when a logical change has been
+ /// completed such as the caller has finished replacing some instructions with
+ /// alternatives. When CheckDebugLocs is true, the locations will be checked
+ /// to see if any have been lost since the last checkpoint. When
+ /// CheckDebugLocs is false, it will just reset ready for the next checkpoint
+ /// without checking anything. This can be helpful to limit the detection to
+ /// easy-to-fix portions of an algorithm before allowing more difficult ones.
+ void checkpoint(bool CheckDebugLocs = true);
+
+ void createdInstr(MachineInstr &MI) override;
+ void erasingInstr(MachineInstr &MI) override;
+ void changingInstr(MachineInstr &MI) override;
+ void changedInstr(MachineInstr &MI) override;
+
+private:
+ void analyzeDebugLocations();
+};
+
+} // namespace llvm
+#endif // ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 13eddd9..55d6d36 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -12,16 +12,15 @@
#ifndef LLVM_GMIR_PATTERNMATCH_H
#define LLVM_GMIR_PATTERNMATCH_H
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/InstrTypes.h"
namespace llvm {
namespace MIPatternMatch {
template <typename Reg, typename Pattern>
-bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) {
+bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) {
return P.match(MRI, R);
}
@@ -30,7 +29,7 @@
SubPatternT SubPat;
OneUse_match(const SubPatternT &SP) : SubPat(SP) {}
- bool match(MachineRegisterInfo &MRI, unsigned Reg) {
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg);
}
};
@@ -40,11 +39,25 @@
return SP;
}
+template <typename SubPatternT> struct OneNonDBGUse_match {
+ SubPatternT SubPat;
+ OneNonDBGUse_match(const SubPatternT &SP) : SubPat(SP) {}
+
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return MRI.hasOneNonDBGUse(Reg) && SubPat.match(MRI, Reg);
+ }
+};
+
+template <typename SubPat>
+inline OneNonDBGUse_match<SubPat> m_OneNonDBGUse(const SubPat &SP) {
+ return SP;
+}
+
struct ConstantMatch {
int64_t &CR;
ConstantMatch(int64_t &C) : CR(C) {}
- bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
- if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) {
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) {
CR = *MaybeCst;
return true;
}
@@ -54,13 +67,36 @@
inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
+/// Matcher for a specific constant value.
+struct SpecificConstantMatch {
+ int64_t RequestedVal;
+ SpecificConstantMatch(int64_t RequestedVal) : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ int64_t MatchedVal;
+ return mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal;
+ }
+};
+
+/// Matches a constant equal to \p RequestedValue.
+inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) {
+ return SpecificConstantMatch(RequestedValue);
+}
+
+///{
+/// Convenience matchers for specific integer values.
+inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); }
+inline SpecificConstantMatch m_AllOnesInt() {
+ return SpecificConstantMatch(-1);
+}
+///}
+
// TODO: Rework this for different kinds of MachineOperand.
// Currently assumes the Src for a match is a register.
// We might want to support taking in some MachineOperands and call getReg on
// that.
struct operand_type_match {
- bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return true; }
+ bool match(const MachineRegisterInfo &MRI, Register Reg) { return true; }
bool match(const MachineRegisterInfo &MRI, MachineOperand *MO) {
return MO->isReg();
}
@@ -71,7 +107,7 @@
/// Matching combinators.
template <typename... Preds> struct And {
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return true;
}
};
@@ -83,14 +119,14 @@
: And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {
}
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return P.match(MRI, src) && And<Preds...>::match(MRI, src);
}
};
template <typename... Preds> struct Or {
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return false;
}
};
@@ -101,7 +137,7 @@
Or(Pred &&p, Preds &&... preds)
: Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {}
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return P.match(MRI, src) || Or<Preds...>::match(MRI, src);
}
};
@@ -123,7 +159,7 @@
template <> struct bind_helper<MachineInstr *> {
static bool bind(const MachineRegisterInfo &MRI, MachineInstr *&MI,
- unsigned Reg) {
+ Register Reg) {
MI = MRI.getVRegDef(Reg);
if (MI)
return true;
@@ -132,7 +168,7 @@
};
template <> struct bind_helper<LLT> {
- static bool bind(const MachineRegisterInfo &MRI, LLT &Ty, unsigned Reg) {
+ static bool bind(const MachineRegisterInfo &MRI, LLT Ty, Register Reg) {
Ty = MRI.getType(Reg);
if (Ty.isValid())
return true;
@@ -142,7 +178,7 @@
template <> struct bind_helper<const ConstantFP *> {
static bool bind(const MachineRegisterInfo &MRI, const ConstantFP *&F,
- unsigned Reg) {
+ Register Reg) {
F = getConstantFPVRegVal(Reg, MRI);
if (F)
return true;
@@ -162,7 +198,9 @@
inline bind_ty<Register> m_Reg(Register &R) { return R; }
inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; }
-inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; }
+inline bind_ty<LLT> m_Type(LLT Ty) { return Ty; }
+inline bind_ty<CmpInst::Predicate> m_Pred(CmpInst::Predicate &P) { return P; }
+inline operand_type_match m_Pred() { return operand_type_match(); }
// Helper for matching G_FCONSTANT
inline bind_ty<const ConstantFP *> m_GFCst(const ConstantFP *&C) { return C; }
@@ -175,7 +213,8 @@
RHS_P R;
BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {}
- template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
MachineInstr *TmpMI;
if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) {
@@ -196,6 +235,12 @@
}
template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true>
+m_GPtrAdd(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB> m_GSub(const LHS &L,
const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB>(L, R);
@@ -232,17 +277,42 @@
}
template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>
+m_GXor(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L,
const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true>(L, R);
}
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SHL, false>
+m_GShl(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_SHL, false>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_LSHR, false>
+m_GLShr(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_LSHR, false>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>
+m_GAShr(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>(L, R);
+}
+
// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc
template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
SrcTy L;
UnaryOp_match(const SrcTy &LHS) : L(LHS) {}
- template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
MachineInstr *TmpMI;
if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) {
@@ -318,18 +388,102 @@
return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src));
}
+// General helper for generic MI compares, i.e. G_ICMP and G_FCMP
+// TODO: Allow checking a specific predicate.
+template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode>
+struct CompareOp_match {
+ Pred_P P;
+ LHS_P L;
+ RHS_P R;
+
+ CompareOp_match(const Pred_P &Pred, const LHS_P &LHS, const RHS_P &RHS)
+ : P(Pred), L(LHS), R(RHS) {}
+
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
+ MachineInstr *TmpMI;
+ if (!mi_match(Op, MRI, m_MInstr(TmpMI)) || TmpMI->getOpcode() != Opcode)
+ return false;
+
+ auto TmpPred =
+ static_cast<CmpInst::Predicate>(TmpMI->getOperand(1).getPredicate());
+ if (!P.match(MRI, TmpPred))
+ return false;
+
+ return L.match(MRI, TmpMI->getOperand(2).getReg()) &&
+ R.match(MRI, TmpMI->getOperand(3).getReg());
+ }
+};
+
+template <typename Pred, typename LHS, typename RHS>
+inline CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_ICMP>
+m_GICmp(const Pred &P, const LHS &L, const RHS &R) {
+ return CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_ICMP>(P, L, R);
+}
+
+template <typename Pred, typename LHS, typename RHS>
+inline CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_FCMP>
+m_GFCmp(const Pred &P, const LHS &L, const RHS &R) {
+ return CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_FCMP>(P, L, R);
+}
+
// Helper for checking if a Reg is of specific type.
struct CheckType {
LLT Ty;
- CheckType(const LLT &Ty) : Ty(Ty) {}
+ CheckType(const LLT Ty) : Ty(Ty) {}
- bool match(MachineRegisterInfo &MRI, unsigned Reg) {
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
return MRI.getType(Reg) == Ty;
}
};
inline CheckType m_SpecificType(LLT Ty) { return Ty; }
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty, unsigned Opcode>
+struct TernaryOp_match {
+ Src0Ty Src0;
+ Src1Ty Src1;
+ Src2Ty Src2;
+
+ TernaryOp_match(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
+ : Src0(Src0), Src1(Src1), Src2(Src2) {}
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
+ MachineInstr *TmpMI;
+ if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
+ if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 4) {
+ return (Src0.match(MRI, TmpMI->getOperand(1).getReg()) &&
+ Src1.match(MRI, TmpMI->getOperand(2).getReg()) &&
+ Src2.match(MRI, TmpMI->getOperand(3).getReg()));
+ }
+ }
+ return false;
+ }
+};
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty>
+inline TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>
+m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
+ return TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2);
+}
+
+/// Matches a register negated by a G_SUB.
+/// G_SUB 0, %negated_reg
+template <typename SrcTy>
+inline BinaryOp_match<SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB>
+m_Neg(const SrcTy &&Src) {
+ return m_GSub(m_ZeroInt(), Src);
+}
+
+/// Matches a register not-ed by a G_XOR.
+/// G_XOR %not_reg, -1
+template <typename SrcTy>
+inline BinaryOp_match<SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true>
+m_Not(const SrcTy &&Src) {
+ return m_GXor(Src, m_AllOnesInt());
+}
+
} // namespace GMIPatternMatch
} // namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index da3c478..1ab4cd7 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -14,15 +14,14 @@
#define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
-#include "llvm/CodeGen/GlobalISel/Types.h"
-
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
-
+#include "llvm/IR/Module.h"
namespace llvm {
@@ -37,23 +36,23 @@
/// to transfer BuilderState between different kinds of MachineIRBuilders.
struct MachineIRBuilderState {
/// MachineFunction under construction.
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
/// Information used to access the description of the opcodes.
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
/// Information used to verify types are consistent and to create virtual registers.
- MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI = nullptr;
/// Debug location to be set to any instruction we create.
DebugLoc DL;
/// \name Fields describing the insertion point.
/// @{
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
MachineBasicBlock::iterator II;
/// @}
- GISelChangeObserver *Observer;
+ GISelChangeObserver *Observer = nullptr;
- GISelCSEInfo *CSEInfo;
+ GISelCSEInfo *CSEInfo = nullptr;
};
class DstOp {
@@ -68,7 +67,7 @@
DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {}
DstOp(Register R) : Reg(R), Ty(DstType::Ty_Reg) {}
DstOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(DstType::Ty_Reg) {}
- DstOp(const LLT &T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
+ DstOp(const LLT T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {}
void addDefToMIB(MachineRegisterInfo &MRI, MachineInstrBuilder &MIB) const {
@@ -122,14 +121,22 @@
MachineInstrBuilder SrcMIB;
Register Reg;
CmpInst::Predicate Pred;
+ int64_t Imm;
};
public:
- enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
+ enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate, Ty_Imm };
SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {}
SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {}
SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
+ /// Use of registers held in unsigned integer variables (or more rarely signed
+ /// integers) is no longer permitted to avoid ambiguity with upcoming support
+ /// for immediates.
+ SrcOp(unsigned) = delete;
+ SrcOp(int) = delete;
+ SrcOp(uint64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {}
+ SrcOp(int64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {}
void addSrcToMIB(MachineInstrBuilder &MIB) const {
switch (Ty) {
@@ -142,12 +149,16 @@
case SrcType::Ty_MIB:
MIB.addUse(SrcMIB->getOperand(0).getReg());
break;
+ case SrcType::Ty_Imm:
+ MIB.addImm(Imm);
+ break;
}
}
LLT getLLTTy(const MachineRegisterInfo &MRI) const {
switch (Ty) {
case SrcType::Ty_Predicate:
+ case SrcType::Ty_Imm:
llvm_unreachable("Not a register operand");
case SrcType::Ty_Reg:
return MRI.getType(Reg);
@@ -160,6 +171,7 @@
Register getReg() const {
switch (Ty) {
case SrcType::Ty_Predicate:
+ case SrcType::Ty_Imm:
llvm_unreachable("Not a register operand");
case SrcType::Ty_Reg:
return Reg;
@@ -178,6 +190,15 @@
}
}
+ int64_t getImm() const {
+ switch (Ty) {
+ case SrcType::Ty_Imm:
+ return Imm;
+ default:
+ llvm_unreachable("Not an immediate");
+ }
+ }
+
SrcType getSrcOpKind() const { return Ty; }
private:
@@ -201,21 +222,39 @@
MachineIRBuilderState State;
protected:
- void validateTruncExt(const LLT &Dst, const LLT &Src, bool IsExtend);
+ void validateTruncExt(const LLT Dst, const LLT Src, bool IsExtend);
- void validateBinaryOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
- void validateShiftOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
+ void validateUnaryOp(const LLT Res, const LLT Op0);
+ void validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1);
+ void validateShiftOp(const LLT Res, const LLT Op0, const LLT Op1);
- void validateSelectOp(const LLT &ResTy, const LLT &TstTy, const LLT &Op0Ty,
- const LLT &Op1Ty);
- void recordInsertion(MachineInstr *MI) const;
+ void validateSelectOp(const LLT ResTy, const LLT TstTy, const LLT Op0Ty,
+ const LLT Op1Ty);
+
+ void recordInsertion(MachineInstr *InsertedInstr) const {
+ if (State.Observer)
+ State.Observer->createdInstr(*InsertedInstr);
+ }
public:
/// Some constructors for easy use.
MachineIRBuilder() = default;
MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
- MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
+
+ MachineIRBuilder(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt) {
+ setMF(*MBB.getParent());
+ setInsertPt(MBB, InsPt);
+ }
+
+ MachineIRBuilder(MachineInstr &MI) :
+ MachineIRBuilder(*MI.getParent(), MI.getIterator()) {
setInstr(MI);
+ setDebugLoc(MI.getDebugLoc());
+ }
+
+ MachineIRBuilder(MachineInstr &MI, GISelChangeObserver &Observer) :
+ MachineIRBuilder(MI) {
+ setChangeObserver(Observer);
}
virtual ~MachineIRBuilder() = default;
@@ -272,10 +311,16 @@
/// Set the insertion point before the specified position.
/// \pre MBB must be in getMF().
/// \pre II must be a valid iterator in MBB.
- void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II);
+ void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II) {
+ assert(MBB.getParent() == &getMF() &&
+ "Basic block is in a different function");
+ State.MBB = &MBB;
+ State.II = II;
+ }
+
/// @}
- void setCSEInfo(GISelCSEInfo *Info);
+ void setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
/// \name Setters for the insertion point.
/// @{
@@ -284,15 +329,34 @@
/// Set the insertion point to the end of \p MBB.
/// \pre \p MBB must be contained by getMF().
- void setMBB(MachineBasicBlock &MBB);
+ void setMBB(MachineBasicBlock &MBB) {
+ State.MBB = &MBB;
+ State.II = MBB.end();
+ assert(&getMF() == MBB.getParent() &&
+ "Basic block is in a different function");
+ }
/// Set the insertion point to before MI.
/// \pre MI must be in getMF().
- void setInstr(MachineInstr &MI);
+ void setInstr(MachineInstr &MI) {
+ assert(MI.getParent() && "Instruction is not part of a basic block");
+ setMBB(*MI.getParent());
+ State.II = MI.getIterator();
+ }
/// @}
- void setChangeObserver(GISelChangeObserver &Observer);
- void stopObservingChanges();
+ /// Set the insertion point to before MI, and set the debug loc to MI's loc.
+ /// \pre MI must be in getMF().
+ void setInstrAndDebugLoc(MachineInstr &MI) {
+ setInstr(MI);
+ setDebugLoc(MI.getDebugLoc());
+ }
+
+ void setChangeObserver(GISelChangeObserver &Observer) {
+ State.Observer = &Observer;
+ }
+
+ void stopObservingChanges() { State.Observer = nullptr; }
/// @}
/// Set the debug location to \p DL for all the next build instructions.
@@ -308,7 +372,9 @@
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildInstr(unsigned Opcode);
+ MachineInstrBuilder buildInstr(unsigned Opcode) {
+ return insertInstr(buildInstrNoInsert(Opcode));
+ }
/// Build but don't insert <empty> = \p Opcode <empty>.
///
@@ -348,6 +414,17 @@
/// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label".
MachineInstrBuilder buildDbgLabel(const MDNode *Label);
+ /// Build and insert \p Res = G_DYN_STACKALLOC \p Size, \p Align
+ ///
+ /// G_DYN_STACKALLOC does a dynamic stack allocation and writes the address of
+ /// the allocated memory into \p Res.
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res must be a generic virtual register with pointer type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildDynStackAlloc(const DstOp &Res, const SrcOp &Size,
+ Align Alignment);
+
/// Build and insert \p Res = G_FRAME_INDEX \p Idx
///
/// G_FRAME_INDEX materializes the address of an alloca value or other
@@ -371,10 +448,11 @@
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV);
- /// Build and insert \p Res = G_GEP \p Op0, \p Op1
+ /// Build and insert \p Res = G_PTR_ADD \p Op0, \p Op1
///
- /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0,
- /// storing the resulting pointer in \p Res.
+ /// G_PTR_ADD adds \p Op1 addressible units to the pointer specified by \p Op0,
+ /// storing the resulting pointer in \p Res. Addressible units are typically
+ /// bytes but this can vary between targets.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res and \p Op0 must be generic virtual registers with pointer
@@ -382,32 +460,38 @@
/// \pre \p Op1 must be a generic virtual register with scalar type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0,
- const SrcOp &Op1);
+ MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1);
- /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
+ /// Materialize and insert \p Res = G_PTR_ADD \p Op0, (G_CONSTANT \p Value)
///
- /// G_GEP adds \p Value bytes to the pointer specified by \p Op0,
+ /// G_PTR_ADD adds \p Value bytes to the pointer specified by \p Op0,
/// storing the resulting pointer in \p Res. If \p Value is zero then no
- /// G_GEP or G_CONSTANT will be created and \pre Op0 will be assigned to
+ /// G_PTR_ADD or G_CONSTANT will be created and \pre Op0 will be assigned to
/// \p Res.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Op0 must be a generic virtual register with pointer type.
/// \pre \p ValueTy must be a scalar type.
/// \pre \p Res must be 0. This is to detect confusion between
- /// materializeGEP() and buildGEP().
+ /// materializePtrAdd() and buildPtrAdd().
/// \post \p Res will either be a new generic virtual register of the same
/// type as \p Op0 or \p Op0 itself.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- Optional<MachineInstrBuilder> materializeGEP(Register &Res, Register Op0,
- const LLT &ValueTy,
- uint64_t Value);
+ Optional<MachineInstrBuilder> materializePtrAdd(Register &Res, Register Op0,
+ const LLT ValueTy,
+ uint64_t Value);
- /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits
+ /// Build and insert \p Res = G_PTRMASK \p Op0, \p Op1
+ MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_PTRMASK, {Res}, {Op0, Op1});
+ }
+
+ /// Build and insert \p Res = G_PTRMASK \p Op0, \p G_CONSTANT (1 << NumBits) - 1
///
- /// G_PTR_MASK clears the low bits of a pointer operand without destroying its
+ /// This clears the low bits of a pointer operand without destroying its
/// pointer properties. This has the effect of rounding the address *down* to
/// a specified alignment in bits.
///
@@ -418,8 +502,8 @@
/// be cleared in \p Op0.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0,
- uint32_t NumBits);
+ MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0,
+ uint32_t NumBits);
/// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1
///
@@ -434,7 +518,27 @@
///
/// \return The newly created instruction.
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut,
- const SrcOp &Op0, const SrcOp &Op1);
+ const SrcOp &Op0, const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_USUBO \p Op0, \p Op1
+ MachineInstrBuilder buildUSubo(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_USUBO, {Res, CarryOut}, {Op0, Op1});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_SADDO \p Op0, \p Op1
+ MachineInstrBuilder buildSAddo(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_SADDO, {Res, CarryOut}, {Op0, Op1});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_SUBO \p Op0, \p Op1
+ MachineInstrBuilder buildSSubo(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_SSUBO, {Res, CarryOut}, {Op0, Op1});
+ }
/// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
/// \p Op1, \p CarryIn
@@ -452,7 +556,34 @@
/// \return The newly created instruction.
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut,
const SrcOp &Op0, const SrcOp &Op1,
- const SrcOp &CarryIn);
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_USUBE \p Op0, \p Op1, \p CarryInp
+ MachineInstrBuilder buildUSube(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1,
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_USUBE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_SADDE \p Op0, \p Op1, \p CarryInp
+ MachineInstrBuilder buildSAdde(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1,
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_SADDE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
+ }
+
+ /// Build and insert \p Res, \p CarryOut = G_SSUBE \p Op0, \p Op1, \p CarryInp
+ MachineInstrBuilder buildSSube(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1,
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_SSUBE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
+ }
/// Build and insert \p Res = G_ANYEXT \p Op0
///
@@ -484,16 +615,38 @@
/// \return The newly created instruction.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
+ /// Build and insert \p Res = G_SEXT_INREG \p Op, ImmOp
+ MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp) {
+ return buildInstr(TargetOpcode::G_SEXT_INREG, {Res}, {Op, SrcOp(ImmOp)});
+ }
+
+ /// Build and insert \p Res = G_FPEXT \p Op
+ MachineInstrBuilder buildFPExt(const DstOp &Res, const SrcOp &Op,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FPEXT, {Res}, {Op}, Flags);
+ }
+
+
/// Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
}
+ /// Build and insert a G_INTTOPTR instruction.
+ MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_INTTOPTR, {Dst}, {Src});
+ }
+
/// Build and insert \p Dst = G_BITCAST \p Src
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src});
}
+ /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src
+ MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src});
+ }
+
/// \return The opcode of the extension the target wants to use for boolean
/// values.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const;
@@ -583,7 +736,7 @@
/// depend on bit 0 (for now).
///
/// \return The newly created instruction.
- MachineInstrBuilder buildBrCond(Register Tst, MachineBasicBlock &Dest);
+ MachineInstrBuilder buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest);
/// Build and insert G_BRINDIRECT \p Tgt
///
@@ -667,7 +820,17 @@
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr,
- MachineMemOperand &MMO);
+ MachineMemOperand &MMO) {
+ return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
+ }
+
+ /// Build and insert a G_LOAD instruction, while constructing the
+ /// MachineMemOperand.
+ MachineInstrBuilder
+ buildLoad(const DstOp &Res, const SrcOp &Addr, MachinePointerInfo PtrInfo,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes());
/// Build and insert `Res = <opcode> Addr, MMO`.
///
@@ -681,6 +844,14 @@
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res,
const SrcOp &Addr, MachineMemOperand &MMO);
+ /// Helper to create a load from a constant offset given a base address. Load
+ /// the type of \p Dst from \p Offset from the given base address and memory
+ /// operand.
+ MachineInstrBuilder buildLoadFromOffset(const DstOp &Dst,
+ const SrcOp &BasePtr,
+ MachineMemOperand &BaseMMO,
+ int64_t Offset);
+
/// Build and insert `G_STORE Val, Addr, MMO`.
///
/// Stores the value \p Val to \p Addr.
@@ -693,6 +864,14 @@
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr,
MachineMemOperand &MMO);
+ /// Build and insert a G_STORE instruction, while constructing the
+ /// MachineMemOperand.
+ MachineInstrBuilder
+ buildStore(const SrcOp &Val, const SrcOp &Addr, MachinePointerInfo PtrInfo,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes());
+
/// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
///
/// \pre setBasicBlock or setMI must have been called.
@@ -732,6 +911,8 @@
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<Register> Ops);
+ MachineInstrBuilder buildMerge(const DstOp &Res,
+ std::initializer_list<SrcOp> Ops);
/// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
///
@@ -782,6 +963,23 @@
MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
ArrayRef<Register> Ops);
+ /// Build and insert a vector splat of a scalar \p Src using a
+ /// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Src must have the same type as the element type of \p Dst
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src);
+
+ /// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1,
+ const SrcOp &Src2, ArrayRef<int> Mask);
+
/// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
///
/// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more
@@ -796,8 +994,8 @@
MachineInstrBuilder buildConcatVectors(const DstOp &Res,
ArrayRef<Register> Ops);
- MachineInstrBuilder buildInsert(Register Res, Register Src,
- Register Op, unsigned Index);
+ MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src,
+ const SrcOp &Op, unsigned Index);
/// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
/// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
@@ -824,7 +1022,8 @@
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
- MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op);
+ MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op,
+ Optional<unsigned> Flags = None);
/// Build and insert \p Res = G_TRUNC \p Op
///
@@ -867,7 +1066,8 @@
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res,
- const SrcOp &Op0, const SrcOp &Op1);
+ const SrcOp &Op0, const SrcOp &Op1,
+ Optional<unsigned> Flags = None);
/// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
///
@@ -880,7 +1080,8 @@
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst,
- const SrcOp &Op0, const SrcOp &Op1);
+ const SrcOp &Op0, const SrcOp &Op1,
+ Optional<unsigned> Flags = None);
/// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
/// \p Elt, \p Idx
@@ -961,8 +1162,8 @@
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes,
- Register Addr, Register Val,
+ MachineInstrBuilder buildAtomicRMW(unsigned Opcode, const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`.
@@ -1135,9 +1336,24 @@
MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr,
Register Val, MachineMemOperand &MMO);
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_FADD Addr, Val, MMO`.
+ MachineInstrBuilder buildAtomicRMWFAdd(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_FSUB Addr, Val, MMO`.
+ MachineInstrBuilder buildAtomicRMWFSub(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
/// Build and insert `G_FENCE Ordering, Scope`.
MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
+ /// Build and insert \p Dst = G_FREEZE \p Src
+ MachineInstrBuilder buildFreeze(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_FREEZE, {Dst}, {Src});
+ }
+
/// Build and insert \p Res = G_BLOCK_ADDR \p BA
///
/// G_BLOCK_ADDR computes the address of a basic block.
@@ -1210,6 +1426,36 @@
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
}
+ MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMinNum(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMINNUM, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMaxNum(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAXNUM, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMINNUM_IEEE, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAXNUM_IEEE, {Dst}, {Src0, Src1}, Flags);
+ }
+
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
@@ -1298,32 +1544,99 @@
return buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, {Dst}, {Src0});
}
+ /// Build and insert \p Dst = G_BSWAP \p Src0
+ MachineInstrBuilder buildBSwap(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_BSWAP, {Dst}, {Src0});
+ }
+
/// Build and insert \p Res = G_FADD \p Op0, \p Op1
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0,
- const SrcOp &Src1) {
- return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1});
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_FSUB \p Op0, \p Op1
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0,
- const SrcOp &Src1) {
- return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1});
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ /// Build and insert \p Res = G_FDIV \p Op0, \p Op1
+ MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
- const SrcOp &Src1, const SrcOp &Src2) {
- return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2});
+ const SrcOp &Src1, const SrcOp &Src2,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}, Flags);
+ }
+
+ /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2
+ MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1, const SrcOp &Src2,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags);
}
/// Build and insert \p Res = G_FNEG \p Op0
- MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
- return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
+ MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags);
}
/// Build and insert \p Res = G_FABS \p Op0
- MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) {
- return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0});
+ MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_FCANONICALIZE \p Src0
+ MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_INTRINSIC_TRUNC \p Src0
+ MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {Dst}, {Src0}, Flags);
+ }
+
+ /// Build and insert \p Res = GFFLOOR \p Op0, \p Op1
+ MachineInstrBuilder buildFFloor(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FFLOOR, {Dst}, {Src0}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_FLOG \p Src
+ MachineInstrBuilder buildFLog(const DstOp &Dst, const SrcOp &Src,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FLOG, {Dst}, {Src}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_FLOG2 \p Src
+ MachineInstrBuilder buildFLog2(const DstOp &Dst, const SrcOp &Src,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FLOG2, {Dst}, {Src}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_FEXP2 \p Src
+ MachineInstrBuilder buildFExp2(const DstOp &Dst, const SrcOp &Src,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags);
+ }
+
+ /// Build and insert \p Dst = G_FPOW \p Src0, \p Src1
+ MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
@@ -1376,6 +1689,11 @@
return buildInstr(TargetOpcode::G_UMAX, {Dst}, {Src0, Src1});
}
+ /// Build and insert \p Dst = G_ABS \p Src
+ MachineInstrBuilder buildAbs(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_ABS, {Dst}, {Src});
+ }
+
/// Build and insert \p Res = G_JUMP_TABLE \p JTI
///
/// G_JUMP_TABLE sets \p Res to the address of the jump table specified by
@@ -1384,6 +1702,101 @@
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI);
+ /// Build and insert \p Res = G_VECREDUCE_SEQ_FADD \p ScalarIn, \p VecIn
+ ///
+ /// \p ScalarIn is the scalar accumulator input to start the sequential
+ /// reduction operation of \p VecIn.
+ MachineInstrBuilder buildVecReduceSeqFAdd(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FADD, {Dst},
+ {ScalarIn, {VecIn}});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SEQ_FMUL \p ScalarIn, \p VecIn
+ ///
+ /// \p ScalarIn is the scalar accumulator input to start the sequential
+ /// reduction operation of \p VecIn.
+ MachineInstrBuilder buildVecReduceSeqFMul(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FMUL, {Dst},
+ {ScalarIn, {VecIn}});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FADD \p Src
+ ///
+ /// \p ScalarIn is the scalar accumulator input to the reduction operation of
+ /// \p VecIn.
+ MachineInstrBuilder buildVecReduceFAdd(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FADD, {Dst}, {ScalarIn, VecIn});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMUL \p Src
+ ///
+ /// \p ScalarIn is the scalar accumulator input to the reduction operation of
+ /// \p VecIn.
+ MachineInstrBuilder buildVecReduceFMul(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMUL, {Dst}, {ScalarIn, VecIn});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMAX \p Src
+ MachineInstrBuilder buildVecReduceFMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMIN \p Src
+ MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src});
+ }
+ /// Build and insert \p Res = G_VECREDUCE_ADD \p Src
+ MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_MUL \p Src
+ MachineInstrBuilder buildVecReduceMul(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_MUL, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_AND \p Src
+ MachineInstrBuilder buildVecReduceAnd(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_AND, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_OR \p Src
+ MachineInstrBuilder buildVecReduceOr(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_OR, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_XOR \p Src
+ MachineInstrBuilder buildVecReduceXor(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_XOR, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SMAX \p Src
+ MachineInstrBuilder buildVecReduceSMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SMIN \p Src
+ MachineInstrBuilder buildVecReduceSMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SMIN, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_UMAX \p Src
+ MachineInstrBuilder buildVecReduceUMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_UMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_UMIN \p Src
+ MachineInstrBuilder buildVecReduceUMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_UMIN, {Dst}, {Src});
+ }
virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
Optional<unsigned> Flags = None);
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index e84b1c3..da78540 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include <cassert>
#include <initializer_list>
#include <memory>
@@ -103,36 +104,37 @@
/// Currently the TableGen-like file would look like:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first vec elt.
- /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR},
- /// /*<2x32-bit> vadd {0, 64, VPR}
+ /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first
+ /// // vec elt.
+ /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR},
+ /// /*<2x32-bit> vadd*/ {0, 64, VPR}
/// }; // PartialMapping duplicated.
///
/// ValueMapping[] {
- /// /*plain 32-bit add*/ {&PartialMapping[0], 1},
+ /// /*plain 32-bit add*/ {&PartialMapping[0], 1},
/// /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2},
- /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1}
+ /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1}
/// };
/// \endcode
///
/// With the array of pointer, we would have:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add lower */ {0, 32, GPR},
+ /// /*32-bit add lower */ { 0, 32, GPR},
/// /*32-bit add upper */ {32, 32, GPR},
- /// /*<2x32-bit> vadd {0, 64, VPR}
+ /// /*<2x32-bit> vadd */ { 0, 64, VPR}
/// }; // No more duplication.
///
/// BreakDowns[] = {
- /// /*AddBreakDown*/ &PartialMapping[0],
+ /// /*AddBreakDown*/ &PartialMapping[0],
/// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1],
- /// /*VAddBreakDown*/ &PartialMapping[2]
+ /// /*VAddBreakDown*/ &PartialMapping[2]
/// }; // Addresses of PartialMapping duplicated (smaller).
///
/// ValueMapping[] {
- /// /*plain 32-bit add*/ {&BreakDowns[0], 1},
+ /// /*plain 32-bit add*/ {&BreakDowns[0], 1},
/// /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2},
- /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1}
+ /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1}
/// };
/// \endcode
///
@@ -543,7 +545,7 @@
const RegisterBank *
getRegBankFromConstraints(const MachineInstr &MI, unsigned OpIdx,
const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) const;
+ const MachineRegisterInfo &MRI) const;
/// Helper method to apply something that is like the default mapping.
/// Basically, that means that \p OpdMapper.getMI() is left untouched
@@ -599,7 +601,7 @@
///
/// \todo This should be TableGen'ed.
virtual const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const {
+ getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const {
llvm_unreachable("The target must override this method");
}
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Types.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Types.h
deleted file mode 100644
index 4fd7043..0000000
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file describes high level types that are used by several passes or
-/// APIs involved in the GlobalISel pipeline.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_GLOBALISEL_TYPES_H
-#define LLVM_CODEGEN_GLOBALISEL_TYPES_H
-
-#include "llvm/ADT/DenseMap.h"
-
-namespace llvm {
-
-class Value;
-
-/// Map a value to a virtual register.
-/// For now, we chose to map aggregate types to on single virtual
-/// register. This might be revisited if it turns out to be inefficient.
-/// PR26161 tracks that.
-/// Note: We need to expose this type to the target hooks for thing like
-/// ABI lowering that would be used during IRTranslation.
-using ValueToVReg = DenseMap<const Value *, unsigned>;
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_GLOBALISEL_TYPES_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Utils.h b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Utils.h
index 6946aad..9bd5180 100644
--- a/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/linux-x64/clang/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -16,23 +16,28 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include <cstdint>
namespace llvm {
class AnalysisUsage;
+class GISelKnownBits;
class MachineFunction;
class MachineInstr;
class MachineOperand;
class MachineOptimizationRemarkEmitter;
class MachineOptimizationRemarkMissed;
+struct MachinePointerInfo;
class MachineRegisterInfo;
class MCInstrDesc;
class RegisterBankInfo;
class TargetInstrInfo;
+class TargetLowering;
class TargetPassConfig;
class TargetRegisterInfo;
class TargetRegisterClass;
-class Twine;
class ConstantFP;
class APFloat;
@@ -40,9 +45,9 @@
/// create a new virtual register in the correct class.
///
/// \return The virtual register constrained to the right register class.
-unsigned constrainRegToClass(MachineRegisterInfo &MRI,
+Register constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
- const RegisterBankInfo &RBI, unsigned Reg,
+ const RegisterBankInfo &RBI, Register Reg,
const TargetRegisterClass &RegClass);
/// Constrain the Register operand OpIdx, so that it is now constrained to the
@@ -52,14 +57,14 @@
/// definition. The debug location of \p InsertPt is used for the new copy.
///
/// \return The virtual register constrained to the right register class.
-unsigned constrainOperandRegClass(const MachineFunction &MF,
+Register constrainOperandRegClass(const MachineFunction &MF,
const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
const RegisterBankInfo &RBI,
MachineInstr &InsertPt,
const TargetRegisterClass &RegClass,
- const MachineOperand &RegMO, unsigned OpIdx);
+ const MachineOperand &RegMO);
/// Try to constrain Reg so that it is usable by argument OpIdx of the
/// provided MCInstrDesc \p II. If this fails, create a new virtual
@@ -70,7 +75,7 @@
/// InsertPt is used for the new copy.
///
/// \return The virtual register constrained to the right register class.
-unsigned constrainOperandRegClass(const MachineFunction &MF,
+Register constrainOperandRegClass(const MachineFunction &MF,
const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
@@ -91,6 +96,11 @@
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI);
+
+/// Check if DstReg can be replaced with SrcReg depending on the register
+/// constraints.
+bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI);
+
/// Check whether an instruction \p MI is dead: it only defines dead virtual
/// registers, and doesn't have other side effects.
bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI);
@@ -106,26 +116,39 @@
const char *PassName, StringRef Msg,
const MachineInstr &MI);
+/// Report an ISel warning as a missed optimization remark to the LLVMContext's
+/// diagnostic stream.
+void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R);
+
+/// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
+Optional<APInt> getConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI);
+
/// If \p VReg is defined by a G_CONSTANT fits in int64_t
/// returns it.
-Optional<int64_t> getConstantVRegVal(unsigned VReg,
- const MachineRegisterInfo &MRI);
+Optional<int64_t> getConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI);
+
/// Simple struct used to hold a constant integer value and a virtual
/// register.
struct ValueAndVReg {
- int64_t Value;
- unsigned VReg;
+ APInt Value;
+ Register VReg;
};
/// If \p VReg is defined by a statically evaluable chain of
-/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true)
-/// and that constant fits in int64_t, returns its value as well as
-/// the virtual register defined by this G_CONSTANT.
-/// When \p LookThroughInstrs == false, this function behaves like
+/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true)
+/// and that constant fits in int64_t, returns its value as well as the
+/// virtual register defined by this G_F/CONSTANT.
+/// When \p LookThroughInstrs == false this function behaves like
/// getConstantVRegVal.
+/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
Optional<ValueAndVReg>
-getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI,
- bool LookThroughInstrs = true);
-const ConstantFP* getConstantFPVRegVal(unsigned VReg,
+getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true,
+ bool HandleFConstants = true);
+const ConstantFP* getConstantFPVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
/// See if Reg is defined by an single def instruction that is
@@ -134,12 +157,30 @@
MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg,
const MachineRegisterInfo &MRI);
-/// Find the def instruction for \p Reg, folding away any trivial copies. Note
-/// it may still return a COPY, if it changes the type. May return nullptr if \p
-/// Reg is not a generic virtual register.
+/// Simple struct used to hold a Register value and the instruction which
+/// defines it.
+struct DefinitionAndSourceRegister {
+ MachineInstr *MI;
+ Register Reg;
+};
+
+/// Find the def instruction for \p Reg, and underlying value Register folding
+/// away any copies.
+Optional<DefinitionAndSourceRegister>
+getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI);
+
+/// Find the def instruction for \p Reg, folding away any trivial copies. May
+/// return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopies(Register Reg,
const MachineRegisterInfo &MRI);
+/// Find the source register for \p Reg, folding away any trivial copies. It
+/// will be an output register of the instruction that getDefIgnoringCopies
+/// returns. May return an invalid register if \p Reg is not a generic virtual
+/// register.
+Register getSrcRegIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI);
+
/// Returns an APFloat from Val converted to the appropriate size.
APFloat getAPFloatFromSize(double Val, unsigned Size);
@@ -147,8 +188,90 @@
/// fallback.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
-Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
- const unsigned Op2,
+Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const Register Op1,
+ const Register Op2,
const MachineRegisterInfo &MRI);
+
+Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
+ uint64_t Imm, const MachineRegisterInfo &MRI);
+
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't necessarily determine which bit is
+/// set.
+bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KnownBits = nullptr);
+
+/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
+/// this returns if \p Val can be assumed to never be a signaling NaN.
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN = false);
+
+/// Returns true if \p Val can be assumed to never be a signaling NaN.
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
+ return isKnownNeverNaN(Val, MRI, true);
+}
+
+Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO);
+
+/// Return a virtual register corresponding to the incoming argument register \p
+/// PhysReg. This register is expected to have class \p RC, and optional type \p
+/// RegTy. This assumes all references to the register will use the same type.
+///
+/// If there is an existing live-in argument register, it will be returned.
+/// This will also ensure there is a valid copy
+Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ LLT RegTy = LLT());
+
+/// Return the least common multiple type of \p OrigTy and \p TargetTy, by changing the
+/// number of vector elements or scalar bitwidth. The intent is a
+/// G_MERGE_VALUES, G_BUILD_VECTOR, or G_CONCAT_VECTORS can be constructed from
+/// \p OrigTy elements, and unmerged into \p TargetTy
+LLVM_READNONE
+LLT getLCMType(LLT OrigTy, LLT TargetTy);
+
+/// Return a type where the total size is the greatest common divisor of \p
+/// OrigTy and \p TargetTy. This will try to either change the number of vector
+/// elements, or bitwidth of scalars. The intent is the result type can be used
+/// as the result of a G_UNMERGE_VALUES from \p OrigTy, and then some
+/// combination of G_MERGE_VALUES, G_BUILD_VECTOR and G_CONCAT_VECTORS (possibly
+/// with intermediate casts) can re-form \p TargetTy.
+///
+/// If these are vectors with different element types, this will try to produce
+/// a vector with a compatible total size, but the element type of \p OrigTy. If
+/// this can't be satisfied, this will produce a scalar smaller than the
+/// original vector elements.
+///
+/// In the worst case, this returns LLT::scalar(1)
+LLVM_READNONE
+LLT getGCDType(LLT OrigTy, LLT TargetTy);
+
+/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
+/// If \p MI is not a splat, returns None.
+Optional<int> getSplatIndex(MachineInstr &MI);
+
+/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists.
+Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
+bool isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef.
+bool isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Returns true if given the TargetLowering's boolean contents information,
+/// the value \p Val contains a true value.
+bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP);
+
+/// Returns an integer representing true, as defined by the
+/// TargetBooleanContents.
+int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
} // End namespace llvm.
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/ISDOpcodes.h b/linux-x64/clang/include/llvm/CodeGen/ISDOpcodes.h
index acf27dc..5358b15 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ISDOpcodes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ISDOpcodes.h
@@ -13,6 +13,8 @@
#ifndef LLVM_CODEGEN_ISDOPCODES_H
#define LLVM_CODEGEN_ISDOPCODES_H
+#include "llvm/CodeGen/ValueTypes.h"
+
namespace llvm {
/// ISD namespace - This namespace contains an enum which represents all of the
@@ -20,1055 +22,1358 @@
///
namespace ISD {
- //===--------------------------------------------------------------------===//
- /// ISD::NodeType enum - This enum defines the target-independent operators
- /// for a SelectionDAG.
+//===--------------------------------------------------------------------===//
+/// ISD::NodeType enum - This enum defines the target-independent operators
+/// for a SelectionDAG.
+///
+/// Targets may also define target-dependent operator codes for SDNodes. For
+/// example, on x86, these are the enum values in the X86ISD namespace.
+/// Targets should aim to use target-independent operators to model their
+/// instruction sets as much as possible, and only use target-dependent
+/// operators when they have special requirements.
+///
+/// Finally, during and after selection proper, SNodes may use special
+/// operator codes that correspond directly with MachineInstr opcodes. These
+/// are used to represent selected instructions. See the isMachineOpcode()
+/// and getMachineOpcode() member functions of SDNode.
+///
+enum NodeType {
+
+ /// DELETED_NODE - This is an illegal value that is used to catch
+ /// errors. This opcode is not a legal opcode for any node.
+ DELETED_NODE,
+
+ /// EntryToken - This is the marker used to indicate the start of a region.
+ EntryToken,
+
+ /// TokenFactor - This node takes multiple tokens as input and produces a
+ /// single token result. This is used to represent the fact that the operand
+ /// operators are independent of each other.
+ TokenFactor,
+
+ /// AssertSext, AssertZext - These nodes record if a register contains a
+ /// value that has already been zero or sign extended from a narrower type.
+ /// These nodes take two operands. The first is the node that has already
+ /// been extended, and the second is a value type node indicating the width
+ /// of the extension
+ AssertSext,
+ AssertZext,
+ AssertAlign,
+
+ /// Various leaf nodes.
+ BasicBlock,
+ VALUETYPE,
+ CONDCODE,
+ Register,
+ RegisterMask,
+ Constant,
+ ConstantFP,
+ GlobalAddress,
+ GlobalTLSAddress,
+ FrameIndex,
+ JumpTable,
+ ConstantPool,
+ ExternalSymbol,
+ BlockAddress,
+
+ /// The address of the GOT
+ GLOBAL_OFFSET_TABLE,
+
+ /// FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and
+ /// llvm.returnaddress on the DAG. These nodes take one operand, the index
+ /// of the frame or return address to return. An index of zero corresponds
+ /// to the current function's frame or return address, an index of one to
+ /// the parent's frame or return address, and so on.
+ FRAMEADDR,
+ RETURNADDR,
+
+ /// ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
+ /// This node takes no operand, returns a target-specific pointer to the
+ /// place in the stack frame where the return address of the current
+ /// function is stored.
+ ADDROFRETURNADDR,
+
+ /// SPONENTRY - Represents the llvm.sponentry intrinsic. Takes no argument
+ /// and returns the stack pointer value at the entry of the current
+ /// function calling this intrinsic.
+ SPONENTRY,
+
+ /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
+ /// Materializes the offset from the local object pointer of another
+ /// function to a particular local object passed to llvm.localescape. The
+ /// operand is the MCSymbol label used to represent this offset, since
+ /// typically the offset is not known until after code generation of the
+ /// parent.
+ LOCAL_RECOVER,
+
+ /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on
+ /// the DAG, which implements the named register global variables extension.
+ READ_REGISTER,
+ WRITE_REGISTER,
+
+ /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to
+ /// first (possible) on-stack argument. This is needed for correct stack
+ /// adjustment during unwind.
+ FRAME_TO_ARGS_OFFSET,
+
+ /// EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical
+ /// Frame Address (CFA), generally the value of the stack pointer at the
+ /// call site in the previous frame.
+ EH_DWARF_CFA,
+
+ /// OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents
+ /// 'eh_return' gcc dwarf builtin, which is used to return from
+ /// exception. The general meaning is: adjust stack by OFFSET and pass
+ /// execution to HANDLER. Many platform-related details also :)
+ EH_RETURN,
+
+ /// RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer)
+ /// This corresponds to the eh.sjlj.setjmp intrinsic.
+ /// It takes an input chain and a pointer to the jump buffer as inputs
+ /// and returns an outchain.
+ EH_SJLJ_SETJMP,
+
+ /// OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
+ /// This corresponds to the eh.sjlj.longjmp intrinsic.
+ /// It takes an input chain and a pointer to the jump buffer as inputs
+ /// and returns an outchain.
+ EH_SJLJ_LONGJMP,
+
+ /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN)
+ /// The target initializes the dispatch table here.
+ EH_SJLJ_SETUP_DISPATCH,
+
+ /// TargetConstant* - Like Constant*, but the DAG does not do any folding,
+ /// simplification, or lowering of the constant. They are used for constants
+ /// which are known to fit in the immediate fields of their users, or for
+ /// carrying magic numbers which are not values which need to be
+ /// materialized in registers.
+ TargetConstant,
+ TargetConstantFP,
+
+ /// TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or
+ /// anything else with this node, and this is valid in the target-specific
+ /// dag, turning into a GlobalAddress operand.
+ TargetGlobalAddress,
+ TargetGlobalTLSAddress,
+ TargetFrameIndex,
+ TargetJumpTable,
+ TargetConstantPool,
+ TargetExternalSymbol,
+ TargetBlockAddress,
+
+ MCSymbol,
+
+ /// TargetIndex - Like a constant pool entry, but with completely
+ /// target-dependent semantics. Holds target flags, a 32-bit index, and a
+ /// 64-bit index. Targets can use this however they like.
+ TargetIndex,
+
+ /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
+ /// This node represents a target intrinsic function with no side effects.
+ /// The first operand is the ID number of the intrinsic from the
+ /// llvm::Intrinsic namespace. The operands to the intrinsic follow. The
+ /// node returns the result of the intrinsic.
+ INTRINSIC_WO_CHAIN,
+
+ /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...)
+ /// This node represents a target intrinsic function with side effects that
+ /// returns a result. The first operand is a chain pointer. The second is
+ /// the ID number of the intrinsic from the llvm::Intrinsic namespace. The
+ /// operands to the intrinsic follow. The node has two results, the result
+ /// of the intrinsic and an output chain.
+ INTRINSIC_W_CHAIN,
+
+ /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...)
+ /// This node represents a target intrinsic function with side effects that
+ /// does not return a result. The first operand is a chain pointer. The
+ /// second is the ID number of the intrinsic from the llvm::Intrinsic
+ /// namespace. The operands to the intrinsic follow.
+ INTRINSIC_VOID,
+
+ /// CopyToReg - This node has three operands: a chain, a register number to
+ /// set to this value, and a value.
+ CopyToReg,
+
+ /// CopyFromReg - This node indicates that the input value is a virtual or
+ /// physical register that is defined outside of the scope of this
+ /// SelectionDAG. The register is available from the RegisterSDNode object.
+ CopyFromReg,
+
+ /// UNDEF - An undefined node.
+ UNDEF,
+
+ // FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or
+ // is evaluated to UNDEF), or returns VAL otherwise. Note that each
+ // read of UNDEF can yield different value, but FREEZE(UNDEF) cannot.
+ FREEZE,
+
+ /// EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
+ /// a Constant, which is required to be operand #1) half of the integer or
+ /// float value specified as operand #0. This is only for use before
+ /// legalization, for values that will be broken into multiple registers.
+ EXTRACT_ELEMENT,
+
+ /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
+ /// Given two values of the same integer value type, this produces a value
+ /// twice as big. Like EXTRACT_ELEMENT, this can only be used before
+ /// legalization. The lower part of the composite value should be in
+ /// element 0 and the upper part should be in element 1.
+ BUILD_PAIR,
+
+ /// MERGE_VALUES - This node takes multiple discrete operands and returns
+ /// them all as its individual results. This nodes has exactly the same
+ /// number of inputs and outputs. This node is useful for some pieces of the
+ /// code generator that want to think about a single node with multiple
+ /// results, not multiple nodes.
+ MERGE_VALUES,
+
+ /// Simple integer binary arithmetic operators.
+ ADD,
+ SUB,
+ MUL,
+ SDIV,
+ UDIV,
+ SREM,
+ UREM,
+
+ /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
+ /// a signed/unsigned value of type i[2*N], and return the full value as
+ /// two results, each of type iN.
+ SMUL_LOHI,
+ UMUL_LOHI,
+
+ /// SDIVREM/UDIVREM - Divide two integers and produce both a quotient and
+ /// remainder result.
+ SDIVREM,
+ UDIVREM,
+
+ /// CARRY_FALSE - This node is used when folding other nodes,
+ /// like ADDC/SUBC, which indicate the carry result is always false.
+ CARRY_FALSE,
+
+ /// Carry-setting nodes for multiple precision addition and subtraction.
+ /// These nodes take two operands of the same value type, and produce two
+ /// results. The first result is the normal add or sub result, the second
+ /// result is the carry flag result.
+ /// FIXME: These nodes are deprecated in favor of ADDCARRY and SUBCARRY.
+ /// They are kept around for now to provide a smooth transition path
+ /// toward the use of ADDCARRY/SUBCARRY and will eventually be removed.
+ ADDC,
+ SUBC,
+
+ /// Carry-using nodes for multiple precision addition and subtraction. These
+ /// nodes take three operands: The first two are the normal lhs and rhs to
+ /// the add or sub, and the third is the input carry flag. These nodes
+ /// produce two results; the normal result of the add or sub, and the output
+ /// carry flag. These nodes both read and write a carry flag to allow them
+ /// to them to be chained together for add and sub of arbitrarily large
+ /// values.
+ ADDE,
+ SUBE,
+
+ /// Carry-using nodes for multiple precision addition and subtraction.
+ /// These nodes take three operands: The first two are the normal lhs and
+ /// rhs to the add or sub, and the third is a boolean indicating if there
+ /// is an incoming carry. These nodes produce two results: the normal
+ /// result of the add or sub, and the output carry so they can be chained
+ /// together. The use of this opcode is preferable to adde/sube if the
+ /// target supports it, as the carry is a regular value rather than a
+ /// glue, which allows further optimisation.
+ ADDCARRY,
+ SUBCARRY,
+
+ /// Carry-using overflow-aware nodes for multiple precision addition and
+ /// subtraction. These nodes take three operands: The first two are normal lhs
+ /// and rhs to the add or sub, and the third is a boolean indicating if there
+ /// is an incoming carry. They produce two results: the normal result of the
+ /// add or sub, and a boolean that indicates if an overflow occured (*not*
+ /// flag, because it may be a store to memory, etc.). If the type of the
+ /// boolean is not i1 then the high bits conform to getBooleanContents.
+ SADDO_CARRY,
+ SSUBO_CARRY,
+
+ /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
+ /// These nodes take two operands: the normal LHS and RHS to the add. They
+ /// produce two results: the normal result of the add, and a boolean that
+ /// indicates if an overflow occurred (*not* a flag, because it may be store
+ /// to memory, etc.). If the type of the boolean is not i1 then the high
+ /// bits conform to getBooleanContents.
+ /// These nodes are generated from llvm.[su]add.with.overflow intrinsics.
+ SADDO,
+ UADDO,
+
+ /// Same for subtraction.
+ SSUBO,
+ USUBO,
+
+ /// Same for multiplication.
+ SMULO,
+ UMULO,
+
+ /// RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2
+ /// integers with the same bit width (W). If the true value of LHS + RHS
+ /// exceeds the largest value that can be represented by W bits, the
+ /// resulting value is this maximum value. Otherwise, if this value is less
+ /// than the smallest value that can be represented by W bits, the
+ /// resulting value is this minimum value.
+ SADDSAT,
+ UADDSAT,
+
+ /// RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2
+ /// integers with the same bit width (W). If the true value of LHS - RHS
+ /// exceeds the largest value that can be represented by W bits, the
+ /// resulting value is this maximum value. Otherwise, if this value is less
+ /// than the smallest value that can be represented by W bits, the
+ /// resulting value is this minimum value.
+ SSUBSAT,
+ USUBSAT,
+
+ /// RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift. The first
+ /// operand is the value to be shifted, and the second argument is the amount
+ /// to shift by. Both must be integers of the same bit width (W). If the true
+ /// value of LHS << RHS exceeds the largest value that can be represented by
+ /// W bits, the resulting value is this maximum value, Otherwise, if this
+ /// value is less than the smallest value that can be represented by W bits,
+ /// the resulting value is this minimum value.
+ SSHLSAT,
+ USHLSAT,
+
+ /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication
+ /// on
+ /// 2 integers with the same width and scale. SCALE represents the scale of
+ /// both operands as fixed point numbers. This SCALE parameter must be a
+ /// constant integer. A scale of zero is effectively performing
+ /// multiplication on 2 integers.
+ SMULFIX,
+ UMULFIX,
+
+ /// Same as the corresponding unsaturated fixed point instructions, but the
+ /// result is clamped between the min and max values representable by the
+ /// bits of the first 2 operands.
+ SMULFIXSAT,
+ UMULFIXSAT,
+
+ /// RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on
+ /// 2 integers with the same width and scale. SCALE represents the scale
+ /// of both operands as fixed point numbers. This SCALE parameter must be a
+ /// constant integer.
+ SDIVFIX,
+ UDIVFIX,
+
+ /// Same as the corresponding unsaturated fixed point instructions, but the
+ /// result is clamped between the min and max values representable by the
+ /// bits of the first 2 operands.
+ SDIVFIXSAT,
+ UDIVFIXSAT,
+
+ /// Simple binary floating point operators.
+ FADD,
+ FSUB,
+ FMUL,
+ FDIV,
+ FREM,
+
+ /// Constrained versions of the binary floating point operators.
+ /// These will be lowered to the simple operators before final selection.
+ /// They are used to limit optimizations while the DAG is being
+ /// optimized.
+ STRICT_FADD,
+ STRICT_FSUB,
+ STRICT_FMUL,
+ STRICT_FDIV,
+ STRICT_FREM,
+ STRICT_FMA,
+
+ /// Constrained versions of libm-equivalent floating point intrinsics.
+ /// These will be lowered to the equivalent non-constrained pseudo-op
+ /// (or expanded to the equivalent library call) before final selection.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_FSQRT,
+ STRICT_FPOW,
+ STRICT_FPOWI,
+ STRICT_FSIN,
+ STRICT_FCOS,
+ STRICT_FEXP,
+ STRICT_FEXP2,
+ STRICT_FLOG,
+ STRICT_FLOG10,
+ STRICT_FLOG2,
+ STRICT_FRINT,
+ STRICT_FNEARBYINT,
+ STRICT_FMAXNUM,
+ STRICT_FMINNUM,
+ STRICT_FCEIL,
+ STRICT_FFLOOR,
+ STRICT_FROUND,
+ STRICT_FROUNDEVEN,
+ STRICT_FTRUNC,
+ STRICT_LROUND,
+ STRICT_LLROUND,
+ STRICT_LRINT,
+ STRICT_LLRINT,
+ STRICT_FMAXIMUM,
+ STRICT_FMINIMUM,
+
+ /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or
+ /// unsigned integer. These have the same semantics as fptosi and fptoui
+ /// in IR.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_TO_SINT,
+ STRICT_FP_TO_UINT,
+
+ /// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
+ /// a floating point value. These have the same semantics as sitofp and
+ /// uitofp in IR.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_SINT_TO_FP,
+ STRICT_UINT_TO_FP,
+
+ /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
+ /// point type down to the precision of the destination VT. TRUNC is a
+ /// flag, which is always an integer that is zero or one. If TRUNC is 0,
+ /// this is a normal rounding, if it is 1, this FP_ROUND is known to not
+ /// change the value of Y.
///
- /// Targets may also define target-dependent operator codes for SDNodes. For
- /// example, on x86, these are the enum values in the X86ISD namespace.
- /// Targets should aim to use target-independent operators to model their
- /// instruction sets as much as possible, and only use target-dependent
- /// operators when they have special requirements.
+ /// The TRUNC = 1 case is used in cases where we know that the value will
+ /// not be modified by the node, because Y is not using any of the extra
+ /// precision of source type. This allows certain transformations like
+ /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
+ /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
+ /// removed.
+ /// It is used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_ROUND,
+
+ /// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
+ /// type.
+ /// It is used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_EXTEND,
+
+ /// STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used
+ /// for floating-point operands only. STRICT_FSETCC performs a quiet
+ /// comparison operation, while STRICT_FSETCCS performs a signaling
+ /// comparison operation.
+ STRICT_FSETCC,
+ STRICT_FSETCCS,
+
+ /// FMA - Perform a * b + c with no intermediate rounding step.
+ FMA,
+
+ /// FMAD - Perform a * b + c, while getting the same result as the
+ /// separately rounded operations.
+ FMAD,
+
+ /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
+ /// DAG node does not require that X and Y have the same type, just that
+ /// they are both floating point. X and the result must have the same type.
+ /// FCOPYSIGN(f32, f64) is allowed.
+ FCOPYSIGN,
+
+ /// INT = FGETSIGN(FP) - Return the sign bit of the specified floating point
+ /// value as an integer 0/1 value.
+ FGETSIGN,
+
+ /// Returns platform specific canonical encoding of a floating point number.
+ FCANONICALIZE,
+
+ /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector
+ /// with the specified, possibly variable, elements. The types of the
+ /// operands must match the vector element type, except that integer types
+ /// are allowed to be larger than the element type, in which case the
+ /// operands are implicitly truncated. The types of the operands must all
+ /// be the same.
+ BUILD_VECTOR,
+
+ /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element
+ /// at IDX replaced with VAL. If the type of VAL is larger than the vector
+ /// element type then VAL is truncated before replacement.
///
- /// Finally, during and after selection proper, SNodes may use special
- /// operator codes that correspond directly with MachineInstr opcodes. These
- /// are used to represent selected instructions. See the isMachineOpcode()
- /// and getMachineOpcode() member functions of SDNode.
+ /// If VECTOR is a scalable vector, then IDX may be larger than the minimum
+ /// vector width. IDX is not first scaled by the runtime scaling factor of
+ /// VECTOR.
+ INSERT_VECTOR_ELT,
+
+ /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
+ /// identified by the (potentially variable) element number IDX. If the return
+ /// type is an integer type larger than the element type of the vector, the
+ /// result is extended to the width of the return type. In that case, the high
+ /// bits are undefined.
///
- enum NodeType {
- /// DELETED_NODE - This is an illegal value that is used to catch
- /// errors. This opcode is not a legal opcode for any node.
- DELETED_NODE,
+ /// If VECTOR is a scalable vector, then IDX may be larger than the minimum
+ /// vector width. IDX is not first scaled by the runtime scaling factor of
+ /// VECTOR.
+ EXTRACT_VECTOR_ELT,
- /// EntryToken - This is the marker used to indicate the start of a region.
- EntryToken,
+ /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
+ /// vector type with the same length and element type, this produces a
+ /// concatenated vector result value, with length equal to the sum of the
+ /// lengths of the input vectors. If VECTOR0 is a fixed-width vector, then
+ /// VECTOR1..VECTORN must all be fixed-width vectors. Similarly, if VECTOR0
+ /// is a scalable vector, then VECTOR1..VECTORN must all be scalable vectors.
+ CONCAT_VECTORS,
- /// TokenFactor - This node takes multiple tokens as input and produces a
- /// single token result. This is used to represent the fact that the operand
- /// operators are independent of each other.
- TokenFactor,
-
- /// AssertSext, AssertZext - These nodes record if a register contains a
- /// value that has already been zero or sign extended from a narrower type.
- /// These nodes take two operands. The first is the node that has already
- /// been extended, and the second is a value type node indicating the width
- /// of the extension
- AssertSext, AssertZext,
-
- /// Various leaf nodes.
- BasicBlock, VALUETYPE, CONDCODE, Register, RegisterMask,
- Constant, ConstantFP,
- GlobalAddress, GlobalTLSAddress, FrameIndex,
- JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
-
- /// The address of the GOT
- GLOBAL_OFFSET_TABLE,
-
- /// FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and
- /// llvm.returnaddress on the DAG. These nodes take one operand, the index
- /// of the frame or return address to return. An index of zero corresponds
- /// to the current function's frame or return address, an index of one to
- /// the parent's frame or return address, and so on.
- FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, SPONENTRY,
-
- /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
- /// Materializes the offset from the local object pointer of another
- /// function to a particular local object passed to llvm.localescape. The
- /// operand is the MCSymbol label used to represent this offset, since
- /// typically the offset is not known until after code generation of the
- /// parent.
- LOCAL_RECOVER,
-
- /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on
- /// the DAG, which implements the named register global variables extension.
- READ_REGISTER,
- WRITE_REGISTER,
-
- /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to
- /// first (possible) on-stack argument. This is needed for correct stack
- /// adjustment during unwind.
- FRAME_TO_ARGS_OFFSET,
-
- /// EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical
- /// Frame Address (CFA), generally the value of the stack pointer at the
- /// call site in the previous frame.
- EH_DWARF_CFA,
-
- /// OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents
- /// 'eh_return' gcc dwarf builtin, which is used to return from
- /// exception. The general meaning is: adjust stack by OFFSET and pass
- /// execution to HANDLER. Many platform-related details also :)
- EH_RETURN,
-
- /// RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer)
- /// This corresponds to the eh.sjlj.setjmp intrinsic.
- /// It takes an input chain and a pointer to the jump buffer as inputs
- /// and returns an outchain.
- EH_SJLJ_SETJMP,
-
- /// OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
- /// This corresponds to the eh.sjlj.longjmp intrinsic.
- /// It takes an input chain and a pointer to the jump buffer as inputs
- /// and returns an outchain.
- EH_SJLJ_LONGJMP,
-
- /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN)
- /// The target initializes the dispatch table here.
- EH_SJLJ_SETUP_DISPATCH,
-
- /// TargetConstant* - Like Constant*, but the DAG does not do any folding,
- /// simplification, or lowering of the constant. They are used for constants
- /// which are known to fit in the immediate fields of their users, or for
- /// carrying magic numbers which are not values which need to be
- /// materialized in registers.
- TargetConstant,
- TargetConstantFP,
-
- /// TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or
- /// anything else with this node, and this is valid in the target-specific
- /// dag, turning into a GlobalAddress operand.
- TargetGlobalAddress,
- TargetGlobalTLSAddress,
- TargetFrameIndex,
- TargetJumpTable,
- TargetConstantPool,
- TargetExternalSymbol,
- TargetBlockAddress,
-
- MCSymbol,
-
- /// TargetIndex - Like a constant pool entry, but with completely
- /// target-dependent semantics. Holds target flags, a 32-bit index, and a
- /// 64-bit index. Targets can use this however they like.
- TargetIndex,
-
- /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
- /// This node represents a target intrinsic function with no side effects.
- /// The first operand is the ID number of the intrinsic from the
- /// llvm::Intrinsic namespace. The operands to the intrinsic follow. The
- /// node returns the result of the intrinsic.
- INTRINSIC_WO_CHAIN,
-
- /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...)
- /// This node represents a target intrinsic function with side effects that
- /// returns a result. The first operand is a chain pointer. The second is
- /// the ID number of the intrinsic from the llvm::Intrinsic namespace. The
- /// operands to the intrinsic follow. The node has two results, the result
- /// of the intrinsic and an output chain.
- INTRINSIC_W_CHAIN,
-
- /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...)
- /// This node represents a target intrinsic function with side effects that
- /// does not return a result. The first operand is a chain pointer. The
- /// second is the ID number of the intrinsic from the llvm::Intrinsic
- /// namespace. The operands to the intrinsic follow.
- INTRINSIC_VOID,
-
- /// CopyToReg - This node has three operands: a chain, a register number to
- /// set to this value, and a value.
- CopyToReg,
-
- /// CopyFromReg - This node indicates that the input value is a virtual or
- /// physical register that is defined outside of the scope of this
- /// SelectionDAG. The register is available from the RegisterSDNode object.
- CopyFromReg,
-
- /// UNDEF - An undefined node.
- UNDEF,
-
- /// EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
- /// a Constant, which is required to be operand #1) half of the integer or
- /// float value specified as operand #0. This is only for use before
- /// legalization, for values that will be broken into multiple registers.
- EXTRACT_ELEMENT,
-
- /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
- /// Given two values of the same integer value type, this produces a value
- /// twice as big. Like EXTRACT_ELEMENT, this can only be used before
- /// legalization. The lower part of the composite value should be in
- /// element 0 and the upper part should be in element 1.
- BUILD_PAIR,
-
- /// MERGE_VALUES - This node takes multiple discrete operands and returns
- /// them all as its individual results. This nodes has exactly the same
- /// number of inputs and outputs. This node is useful for some pieces of the
- /// code generator that want to think about a single node with multiple
- /// results, not multiple nodes.
- MERGE_VALUES,
-
- /// Simple integer binary arithmetic operators.
- ADD, SUB, MUL, SDIV, UDIV, SREM, UREM,
-
- /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
- /// a signed/unsigned value of type i[2*N], and return the full value as
- /// two results, each of type iN.
- SMUL_LOHI, UMUL_LOHI,
-
- /// SDIVREM/UDIVREM - Divide two integers and produce both a quotient and
- /// remainder result.
- SDIVREM, UDIVREM,
-
- /// CARRY_FALSE - This node is used when folding other nodes,
- /// like ADDC/SUBC, which indicate the carry result is always false.
- CARRY_FALSE,
-
- /// Carry-setting nodes for multiple precision addition and subtraction.
- /// These nodes take two operands of the same value type, and produce two
- /// results. The first result is the normal add or sub result, the second
- /// result is the carry flag result.
- /// FIXME: These nodes are deprecated in favor of ADDCARRY and SUBCARRY.
- /// They are kept around for now to provide a smooth transition path
- /// toward the use of ADDCARRY/SUBCARRY and will eventually be removed.
- ADDC, SUBC,
-
- /// Carry-using nodes for multiple precision addition and subtraction. These
- /// nodes take three operands: The first two are the normal lhs and rhs to
- /// the add or sub, and the third is the input carry flag. These nodes
- /// produce two results; the normal result of the add or sub, and the output
- /// carry flag. These nodes both read and write a carry flag to allow them
- /// to them to be chained together for add and sub of arbitrarily large
- /// values.
- ADDE, SUBE,
-
- /// Carry-using nodes for multiple precision addition and subtraction.
- /// These nodes take three operands: The first two are the normal lhs and
- /// rhs to the add or sub, and the third is a boolean indicating if there
- /// is an incoming carry. These nodes produce two results: the normal
- /// result of the add or sub, and the output carry so they can be chained
- /// together. The use of this opcode is preferable to adde/sube if the
- /// target supports it, as the carry is a regular value rather than a
- /// glue, which allows further optimisation.
- ADDCARRY, SUBCARRY,
-
- /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
- /// These nodes take two operands: the normal LHS and RHS to the add. They
- /// produce two results: the normal result of the add, and a boolean that
- /// indicates if an overflow occurred (*not* a flag, because it may be store
- /// to memory, etc.). If the type of the boolean is not i1 then the high
- /// bits conform to getBooleanContents.
- /// These nodes are generated from llvm.[su]add.with.overflow intrinsics.
- SADDO, UADDO,
-
- /// Same for subtraction.
- SSUBO, USUBO,
-
- /// Same for multiplication.
- SMULO, UMULO,
-
- /// RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2
- /// integers with the same bit width (W). If the true value of LHS + RHS
- /// exceeds the largest value that can be represented by W bits, the
- /// resulting value is this maximum value. Otherwise, if this value is less
- /// than the smallest value that can be represented by W bits, the
- /// resulting value is this minimum value.
- SADDSAT, UADDSAT,
-
- /// RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2
- /// integers with the same bit width (W). If the true value of LHS - RHS
- /// exceeds the largest value that can be represented by W bits, the
- /// resulting value is this maximum value. Otherwise, if this value is less
- /// than the smallest value that can be represented by W bits, the
- /// resulting value is this minimum value.
- SSUBSAT, USUBSAT,
-
- /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
- /// 2 integers with the same width and scale. SCALE represents the scale of
- /// both operands as fixed point numbers. This SCALE parameter must be a
- /// constant integer. A scale of zero is effectively performing
- /// multiplication on 2 integers.
- SMULFIX, UMULFIX,
-
- /// Same as the corresponding unsaturated fixed point instructions, but the
- /// result is clamped between the min and max values representable by the
- /// bits of the first 2 operands.
- SMULFIXSAT,
-
- /// Simple binary floating point operators.
- FADD, FSUB, FMUL, FDIV, FREM,
-
- /// Constrained versions of the binary floating point operators.
- /// These will be lowered to the simple operators before final selection.
- /// They are used to limit optimizations while the DAG is being
- /// optimized.
- STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
- STRICT_FMA,
-
- /// Constrained versions of libm-equivalent floating point intrinsics.
- /// These will be lowered to the equivalent non-constrained pseudo-op
- /// (or expanded to the equivalent library call) before final selection.
- /// They are used to limit optimizations while the DAG is being optimized.
- STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
- STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
- STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
- STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
-
- /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
- /// point type down to the precision of the destination VT. TRUNC is a
- /// flag, which is always an integer that is zero or one. If TRUNC is 0,
- /// this is a normal rounding, if it is 1, this FP_ROUND is known to not
- /// change the value of Y.
- ///
- /// The TRUNC = 1 case is used in cases where we know that the value will
- /// not be modified by the node, because Y is not using any of the extra
- /// precision of source type. This allows certain transformations like
- /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
- /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
- /// removed.
- /// It is used to limit optimizations while the DAG is being optimized.
- STRICT_FP_ROUND,
-
- /// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
- /// type.
- /// It is used to limit optimizations while the DAG is being optimized.
- STRICT_FP_EXTEND,
-
- /// FMA - Perform a * b + c with no intermediate rounding step.
- FMA,
-
- /// FMAD - Perform a * b + c, while getting the same result as the
- /// separately rounded operations.
- FMAD,
-
- /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
- /// DAG node does not require that X and Y have the same type, just that
- /// they are both floating point. X and the result must have the same type.
- /// FCOPYSIGN(f32, f64) is allowed.
- FCOPYSIGN,
-
- /// INT = FGETSIGN(FP) - Return the sign bit of the specified floating point
- /// value as an integer 0/1 value.
- FGETSIGN,
-
- /// Returns platform specific canonical encoding of a floating point number.
- FCANONICALIZE,
-
- /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the
- /// specified, possibly variable, elements. The number of elements is
- /// required to be a power of two. The types of the operands must all be
- /// the same and must match the vector element type, except that integer
- /// types are allowed to be larger than the element type, in which case
- /// the operands are implicitly truncated.
- BUILD_VECTOR,
-
- /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element
- /// at IDX replaced with VAL. If the type of VAL is larger than the vector
- /// element type then VAL is truncated before replacement.
- INSERT_VECTOR_ELT,
-
- /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
- /// identified by the (potentially variable) element number IDX. If the
- /// return type is an integer type larger than the element type of the
- /// vector, the result is extended to the width of the return type. In
- /// that case, the high bits are undefined.
- EXTRACT_VECTOR_ELT,
-
- /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
- /// vector type with the same length and element type, this produces a
- /// concatenated vector result value, with length equal to the sum of the
- /// lengths of the input vectors.
- CONCAT_VECTORS,
-
- /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector
- /// with VECTOR2 inserted into VECTOR1 at the (potentially
- /// variable) element number IDX, which must be a multiple of the
- /// VECTOR2 vector length. The elements of VECTOR1 starting at
- /// IDX are overwritten with VECTOR2. Elements IDX through
- /// vector_length(VECTOR2) must be valid VECTOR1 indices.
- INSERT_SUBVECTOR,
-
- /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an
- /// vector value) starting with the element number IDX, which must be a
- /// constant multiple of the result vector length.
- EXTRACT_SUBVECTOR,
-
- /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
- /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int
- /// values that indicate which value (or undef) each result element will
- /// get. These constant ints are accessible through the
- /// ShuffleVectorSDNode class. This is quite similar to the Altivec
- /// 'vperm' instruction, except that the indices must be constants and are
- /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
- VECTOR_SHUFFLE,
-
- /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
- /// scalar value into element 0 of the resultant vector type. The top
- /// elements 1 to N-1 of the N-element vector are undefined. The type
- /// of the operand must match the vector element type, except when they
- /// are integer types. In this case the operand is allowed to be wider
- /// than the vector element type, and is implicitly truncated to it.
- SCALAR_TO_VECTOR,
-
- /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
- /// producing an unsigned/signed value of type i[2*N], then return the top
- /// part.
- MULHU, MULHS,
-
- /// [US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned
- /// integers.
- SMIN, SMAX, UMIN, UMAX,
-
- /// Bitwise operators - logical and, logical or, logical xor.
- AND, OR, XOR,
-
- /// ABS - Determine the unsigned absolute value of a signed integer value of
- /// the same bitwidth.
- /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
- /// is performed.
- ABS,
-
- /// Shift and rotation operations. After legalization, the type of the
- /// shift amount is known to be TLI.getShiftAmountTy(). Before legalization
- /// the shift amount can be any type, but care must be taken to ensure it is
- /// large enough. TLI.getShiftAmountTy() is i8 on some targets, but before
- /// legalization, types like i1024 can occur and i8 doesn't have enough bits
- /// to represent the shift amount.
- /// When the 1st operand is a vector, the shift amount must be in the same
- /// type. (TLI.getShiftAmountTy() will return the same type when the input
- /// type is a vector.)
- /// For rotates and funnel shifts, the shift amount is treated as an unsigned
- /// amount modulo the element size of the first operand.
- ///
- /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
- /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
- SHL, SRA, SRL, ROTL, ROTR, FSHL, FSHR,
-
- /// Byte Swap and Counting operators.
- BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
-
- /// Bit counting operators with an undefined result for zero inputs.
- CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
-
- /// Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not
- /// i1 then the high bits must conform to getBooleanContents.
- SELECT,
-
- /// Select with a vector condition (op #0) and two vector operands (ops #1
- /// and #2), returning a vector result. All vectors have the same length.
- /// Much like the scalar select and setcc, each bit in the condition selects
- /// whether the corresponding result element is taken from op #1 or op #2.
- /// At first, the VSELECT condition is of vXi1 type. Later, targets may
- /// change the condition type in order to match the VSELECT node using a
- /// pattern. The condition follows the BooleanContent format of the target.
- VSELECT,
-
- /// Select with condition operator - This selects between a true value and
- /// a false value (ops #2 and #3) based on the boolean result of comparing
- /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
- /// condition code in op #4, a CondCodeSDNode.
- SELECT_CC,
-
- /// SetCC operator - This evaluates to a true value iff the condition is
- /// true. If the result value type is not i1 then the high bits conform
- /// to getBooleanContents. The operands to this are the left and right
- /// operands to compare (ops #0, and #1) and the condition code to compare
- /// them with (op #2) as a CondCodeSDNode. If the operands are vector types
- /// then the result type must also be a vector type.
- SETCC,
-
- /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
- /// op #2 is a boolean indicating if there is an incoming carry. This
- /// operator checks the result of "LHS - RHS - Carry", and can be used to
- /// compare two wide integers:
- /// (setcccarry lhshi rhshi (subcarry lhslo rhslo) cc).
- /// Only valid for integers.
- SETCCCARRY,
-
- /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
- /// integer shift operations. The operation ordering is:
- /// [Lo,Hi] = op [LoLHS,HiLHS], Amt
- SHL_PARTS, SRA_PARTS, SRL_PARTS,
-
- /// Conversion operators. These are all single input single output
- /// operations. For all of these, the result type must be strictly
- /// wider or narrower (depending on the operation) than the source
- /// type.
-
- /// SIGN_EXTEND - Used for integer types, replicating the sign bit
- /// into new bits.
- SIGN_EXTEND,
-
- /// ZERO_EXTEND - Used for integer types, zeroing the new bits.
- ZERO_EXTEND,
-
- /// ANY_EXTEND - Used for integer types. The high bits are undefined.
- ANY_EXTEND,
-
- /// TRUNCATE - Completely drop the high bits.
- TRUNCATE,
-
- /// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
- /// depends on the first letter) to floating point.
- SINT_TO_FP,
- UINT_TO_FP,
-
- /// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
- /// sign extend a small value in a large integer register (e.g. sign
- /// extending the low 8 bits of a 32-bit register to fill the top 24 bits
- /// with the 7th bit). The size of the smaller type is indicated by the 1th
- /// operand, a ValueType node.
- SIGN_EXTEND_INREG,
-
- /// ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an
- /// in-register any-extension of the low lanes of an integer vector. The
- /// result type must have fewer elements than the operand type, and those
- /// elements must be larger integer types such that the total size of the
- /// operand type is less than or equal to the size of the result type. Each
- /// of the low operand elements is any-extended into the corresponding,
- /// wider result elements with the high bits becoming undef.
- /// NOTE: The type legalizer prefers to make the operand and result size
- /// the same to allow expansion to shuffle vector during op legalization.
- ANY_EXTEND_VECTOR_INREG,
-
- /// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an
- /// in-register sign-extension of the low lanes of an integer vector. The
- /// result type must have fewer elements than the operand type, and those
- /// elements must be larger integer types such that the total size of the
- /// operand type is less than or equal to the size of the result type. Each
- /// of the low operand elements is sign-extended into the corresponding,
- /// wider result elements.
- /// NOTE: The type legalizer prefers to make the operand and result size
- /// the same to allow expansion to shuffle vector during op legalization.
- SIGN_EXTEND_VECTOR_INREG,
-
- /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
- /// in-register zero-extension of the low lanes of an integer vector. The
- /// result type must have fewer elements than the operand type, and those
- /// elements must be larger integer types such that the total size of the
- /// operand type is less than or equal to the size of the result type. Each
- /// of the low operand elements is zero-extended into the corresponding,
- /// wider result elements.
- /// NOTE: The type legalizer prefers to make the operand and result size
- /// the same to allow expansion to shuffle vector during op legalization.
- ZERO_EXTEND_VECTOR_INREG,
-
- /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
- /// integer. These have the same semantics as fptosi and fptoui in IR. If
- /// the FP value cannot fit in the integer type, the results are undefined.
- FP_TO_SINT,
- FP_TO_UINT,
-
- /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
- /// down to the precision of the destination VT. TRUNC is a flag, which is
- /// always an integer that is zero or one. If TRUNC is 0, this is a
- /// normal rounding, if it is 1, this FP_ROUND is known to not change the
- /// value of Y.
- ///
- /// The TRUNC = 1 case is used in cases where we know that the value will
- /// not be modified by the node, because Y is not using any of the extra
- /// precision of source type. This allows certain transformations like
- /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for
- /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed.
- FP_ROUND,
-
- /// FLT_ROUNDS_ - Returns current rounding mode:
- /// -1 Undefined
- /// 0 Round to 0
- /// 1 Round to nearest
- /// 2 Round to +inf
- /// 3 Round to -inf
- FLT_ROUNDS_,
-
- /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and
- /// rounds it to a floating point value. It then promotes it and returns it
- /// in a register of the same size. This operation effectively just
- /// discards excess precision. The type to round down to is specified by
- /// the VT operand, a VTSDNode.
- FP_ROUND_INREG,
-
- /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
- FP_EXTEND,
-
- /// BITCAST - This operator converts between integer, vector and FP
- /// values, as if the value was stored to memory with one type and loaded
- /// from the same address with the other type (or equivalently for vector
- /// format conversions, etc). The source and result are required to have
- /// the same bit size (e.g. f32 <-> i32). This can also be used for
- /// int-to-int or fp-to-fp conversions, but that is a noop, deleted by
- /// getNode().
- ///
- /// This operator is subtly different from the bitcast instruction from
- /// LLVM-IR since this node may change the bits in the register. For
- /// example, this occurs on big-endian NEON and big-endian MSA where the
- /// layout of the bits in the register depends on the vector type and this
- /// operator acts as a shuffle operation for some vector type combinations.
- BITCAST,
-
- /// ADDRSPACECAST - This operator converts between pointers of different
- /// address spaces.
- ADDRSPACECAST,
-
- /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions
- /// and truncation for half-precision (16 bit) floating numbers. These nodes
- /// form a semi-softened interface for dealing with f16 (as an i16), which
- /// is often a storage-only type but has native conversions.
- FP16_TO_FP, FP_TO_FP16,
-
- /// Perform various unary floating-point operations inspired by libm. For
- /// FPOWI, the result is undefined if if the integer operand doesn't fit
- /// into 32 bits.
- FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
- FLOG, FLOG2, FLOG10, FEXP, FEXP2,
- FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
- LROUND, LLROUND, LRINT, LLRINT,
-
- /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
- /// values.
- //
- /// In the case where a single input is a NaN (either signaling or quiet),
- /// the non-NaN input is returned.
- ///
- /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
- FMINNUM, FMAXNUM,
-
- /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
- /// two values, following the IEEE-754 2008 definition. This differs from
- /// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
- /// signaling NaN, returns a quiet NaN.
- FMINNUM_IEEE, FMAXNUM_IEEE,
-
- /// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
- /// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
- /// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
- FMINIMUM, FMAXIMUM,
-
- /// FSINCOS - Compute both fsin and fcos as a single operation.
- FSINCOS,
-
- /// LOAD and STORE have token chains as their first operand, then the same
- /// operands as an LLVM load/store instruction, then an offset node that
- /// is added / subtracted from the base pointer to form the address (for
- /// indexed memory ops).
- LOAD, STORE,
-
- /// DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
- /// to a specified boundary. This node always has two return values: a new
- /// stack pointer value and a chain. The first operand is the token chain,
- /// the second is the number of bytes to allocate, and the third is the
- /// alignment boundary. The size is guaranteed to be a multiple of the
- /// stack alignment, and the alignment is guaranteed to be bigger than the
- /// stack alignment (if required) or 0 to get standard stack alignment.
- DYNAMIC_STACKALLOC,
-
- /// Control flow instructions. These all have token chains.
-
- /// BR - Unconditional branch. The first operand is the chain
- /// operand, the second is the MBB to branch to.
- BR,
-
- /// BRIND - Indirect branch. The first operand is the chain, the second
- /// is the value to branch to, which must be of the same type as the
- /// target's pointer type.
- BRIND,
-
- /// BR_JT - Jumptable branch. The first operand is the chain, the second
- /// is the jumptable index, the last one is the jumptable entry index.
- BR_JT,
-
- /// BRCOND - Conditional branch. The first operand is the chain, the
- /// second is the condition, the third is the block to branch to if the
- /// condition is true. If the type of the condition is not i1, then the
- /// high bits must conform to getBooleanContents.
- BRCOND,
-
- /// BR_CC - Conditional branch. The behavior is like that of SELECT_CC, in
- /// that the condition is represented as condition code, and two nodes to
- /// compare, rather than as a combined SetCC node. The operands in order
- /// are chain, cc, lhs, rhs, block to branch to if condition is true.
- BR_CC,
-
- /// INLINEASM - Represents an inline asm block. This node always has two
- /// return values: a chain and a flag result. The inputs are as follows:
- /// Operand #0 : Input chain.
- /// Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string.
- /// Operand #2 : a MDNodeSDNode with the !srcloc metadata.
- /// Operand #3 : HasSideEffect, IsAlignStack bits.
- /// After this, it is followed by a list of operands with this format:
- /// ConstantSDNode: Flags that encode whether it is a mem or not, the
- /// of operands that follow, etc. See InlineAsm.h.
- /// ... however many operands ...
- /// Operand #last: Optional, an incoming flag.
- ///
- /// The variable width operands are required to represent target addressing
- /// modes as a single "operand", even though they may have multiple
- /// SDOperands.
- INLINEASM,
-
- /// INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
- INLINEASM_BR,
-
- /// EH_LABEL - Represents a label in mid basic block used to track
- /// locations needed for debug and exception handling tables. These nodes
- /// take a chain as input and return a chain.
- EH_LABEL,
-
- /// ANNOTATION_LABEL - Represents a mid basic block label used by
- /// annotations. This should remain within the basic block and be ordered
- /// with respect to other call instructions, but loads and stores may float
- /// past it.
- ANNOTATION_LABEL,
-
- /// CATCHPAD - Represents a catchpad instruction.
- CATCHPAD,
-
- /// CATCHRET - Represents a return from a catch block funclet. Used for
- /// MSVC compatible exception handling. Takes a chain operand and a
- /// destination basic block operand.
- CATCHRET,
-
- /// CLEANUPRET - Represents a return from a cleanup block funclet. Used for
- /// MSVC compatible exception handling. Takes only a chain operand.
- CLEANUPRET,
-
- /// STACKSAVE - STACKSAVE has one operand, an input chain. It produces a
- /// value, the same type as the pointer type for the system, and an output
- /// chain.
- STACKSAVE,
-
- /// STACKRESTORE has two operands, an input chain and a pointer to restore
- /// to it returns an output chain.
- STACKRESTORE,
-
- /// CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end
- /// of a call sequence, and carry arbitrary information that target might
- /// want to know. The first operand is a chain, the rest are specified by
- /// the target and not touched by the DAG optimizers.
- /// Targets that may use stack to pass call arguments define additional
- /// operands:
- /// - size of the call frame part that must be set up within the
- /// CALLSEQ_START..CALLSEQ_END pair,
- /// - part of the call frame prepared prior to CALLSEQ_START.
- /// Both these parameters must be constants, their sum is the total call
- /// frame size.
- /// CALLSEQ_START..CALLSEQ_END pairs may not be nested.
- CALLSEQ_START, // Beginning of a call sequence
- CALLSEQ_END, // End of a call sequence
-
- /// VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE,
- /// and the alignment. It returns a pair of values: the vaarg value and a
- /// new chain.
- VAARG,
-
- /// VACOPY - VACOPY has 5 operands: an input chain, a destination pointer,
- /// a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the
- /// source.
- VACOPY,
-
- /// VAEND, VASTART - VAEND and VASTART have three operands: an input chain,
- /// pointer, and a SRCVALUE.
- VAEND, VASTART,
-
- /// SRCVALUE - This is a node type that holds a Value* that is used to
- /// make reference to a value in the LLVM IR.
- SRCVALUE,
-
- /// MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to
- /// reference metadata in the IR.
- MDNODE_SDNODE,
-
- /// PCMARKER - This corresponds to the pcmarker intrinsic.
- PCMARKER,
-
- /// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
- /// It produces a chain and one i64 value. The only operand is a chain.
- /// If i64 is not legal, the result will be expanded into smaller values.
- /// Still, it returns an i64, so targets should set legality for i64.
- /// The result is the content of the architecture-specific cycle
- /// counter-like register (or other high accuracy low latency clock source).
- READCYCLECOUNTER,
-
- /// HANDLENODE node - Used as a handle for various purposes.
- HANDLENODE,
-
- /// INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It
- /// takes as input a token chain, the pointer to the trampoline, the pointer
- /// to the nested function, the pointer to pass for the 'nest' parameter, a
- /// SRCVALUE for the trampoline and another for the nested function
- /// (allowing targets to access the original Function*).
- /// It produces a token chain as output.
- INIT_TRAMPOLINE,
-
- /// ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
- /// It takes a pointer to the trampoline and produces a (possibly) new
- /// pointer to the same trampoline with platform-specific adjustments
- /// applied. The pointer it returns points to an executable block of code.
- ADJUST_TRAMPOLINE,
-
- /// TRAP - Trapping instruction
- TRAP,
-
- /// DEBUGTRAP - Trap intended to get the attention of a debugger.
- DEBUGTRAP,
-
- /// PREFETCH - This corresponds to a prefetch intrinsic. The first operand
- /// is the chain. The other operands are the address to prefetch,
- /// read / write specifier, locality specifier and instruction / data cache
- /// specifier.
- PREFETCH,
-
- /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope)
- /// This corresponds to the fence instruction. It takes an input chain, and
- /// two integer constants: an AtomicOrdering and a SynchronizationScope.
- ATOMIC_FENCE,
-
- /// Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr)
- /// This corresponds to "load atomic" instruction.
- ATOMIC_LOAD,
-
- /// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val)
- /// This corresponds to "store atomic" instruction.
- ATOMIC_STORE,
-
- /// Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
- /// For double-word atomic operations:
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmpLo, cmpHi,
- /// swapLo, swapHi)
- /// This corresponds to the cmpxchg instruction.
- ATOMIC_CMP_SWAP,
-
- /// Val, Success, OUTCHAIN
- /// = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap)
- /// N.b. this is still a strong cmpxchg operation, so
- /// Success == "Val == cmp".
- ATOMIC_CMP_SWAP_WITH_SUCCESS,
-
- /// Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
- /// Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
- /// For double-word atomic operations:
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi)
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi)
- /// These correspond to the atomicrmw instruction.
- ATOMIC_SWAP,
- ATOMIC_LOAD_ADD,
- ATOMIC_LOAD_SUB,
- ATOMIC_LOAD_AND,
- ATOMIC_LOAD_CLR,
- ATOMIC_LOAD_OR,
- ATOMIC_LOAD_XOR,
- ATOMIC_LOAD_NAND,
- ATOMIC_LOAD_MIN,
- ATOMIC_LOAD_MAX,
- ATOMIC_LOAD_UMIN,
- ATOMIC_LOAD_UMAX,
- ATOMIC_LOAD_FADD,
- ATOMIC_LOAD_FSUB,
-
- // Masked load and store - consecutive vector load and store operations
- // with additional mask operand that prevents memory accesses to the
- // masked-off lanes.
- //
- // Val, OutChain = MLOAD(BasePtr, Mask, PassThru)
- // OutChain = MSTORE(Value, BasePtr, Mask)
- MLOAD, MSTORE,
-
- // Masked gather and scatter - load and store operations for a vector of
- // random addresses with additional mask operand that prevents memory
- // accesses to the masked-off lanes.
- //
- // Val, OutChain = GATHER(InChain, PassThru, Mask, BasePtr, Index, Scale)
- // OutChain = SCATTER(InChain, Value, Mask, BasePtr, Index, Scale)
- //
- // The Index operand can have more vector elements than the other operands
- // due to type legalization. The extra elements are ignored.
- MGATHER, MSCATTER,
-
- /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
- /// is the chain and the second operand is the alloca pointer.
- LIFETIME_START, LIFETIME_END,
-
- /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
- /// beginning and end of GC transition sequence, and carry arbitrary
- /// information that target might need for lowering. The first operand is
- /// a chain, the rest are specified by the target and not touched by the DAG
- /// optimizers. GC_TRANSITION_START..GC_TRANSITION_END pairs may not be
- /// nested.
- GC_TRANSITION_START,
- GC_TRANSITION_END,
-
- /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of
- /// the most recent dynamic alloca. For most targets that would be 0, but
- /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time
- /// known nonzero constant. The only operand here is the chain.
- GET_DYNAMIC_AREA_OFFSET,
-
- /// Generic reduction nodes. These nodes represent horizontal vector
- /// reduction operations, producing a scalar result.
- /// The STRICT variants perform reductions in sequential order. The first
- /// operand is an initial scalar accumulator value, and the second operand
- /// is the vector to reduce.
- VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL,
- /// These reductions are non-strict, and have a single vector operand.
- VECREDUCE_FADD, VECREDUCE_FMUL,
- /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
- VECREDUCE_FMAX, VECREDUCE_FMIN,
- /// Integer reductions may have a result type larger than the vector element
- /// type. However, the reduction is performed using the vector element type
- /// and the value in the top bits is unspecified.
- VECREDUCE_ADD, VECREDUCE_MUL,
- VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR,
- VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN,
-
- /// BUILTIN_OP_END - This must be the last enum value in this list.
- /// The target-specific pre-isel opcode values start here.
- BUILTIN_OP_END
- };
-
- /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
- /// which do not reference a specific memory location should be less than
- /// this value. Those that do must not be less than this value, and can
- /// be used with SelectionDAG::getMemIntrinsicNode.
- static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400;
-
- //===--------------------------------------------------------------------===//
- /// MemIndexedMode enum - This enum defines the load / store indexed
- /// addressing modes.
+ /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2
+ /// inserted into VECTOR1. IDX represents the starting element number at which
+ /// VECTOR2 will be inserted. IDX must be a constant multiple of T's known
+ /// minimum vector length. Let the type of VECTOR2 be T, then if T is a
+ /// scalable vector, IDX is first scaled by the runtime scaling factor of T.
+ /// The elements of VECTOR1 starting at IDX are overwritten with VECTOR2.
+ /// Elements IDX through (IDX + num_elements(T) - 1) must be valid VECTOR1
+ /// indices. If this condition cannot be determined statically but is false at
+ /// runtime, then the result vector is undefined.
///
- /// UNINDEXED "Normal" load / store. The effective address is already
- /// computed and is available in the base pointer. The offset
- /// operand is always undefined. In addition to producing a
- /// chain, an unindexed load produces one value (result of the
- /// load); an unindexed store does not produce a value.
+ /// This operation supports inserting a fixed-width vector into a scalable
+ /// vector, but not the other way around.
+ INSERT_SUBVECTOR,
+
+ /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
+ /// Let the result type be T, then IDX represents the starting element number
+ /// from which a subvector of type T is extracted. IDX must be a constant
+ /// multiple of T's known minimum vector length. If T is a scalable vector,
+ /// IDX is first scaled by the runtime scaling factor of T. Elements IDX
+ /// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this
+ /// condition cannot be determined statically but is false at runtime, then
+ /// the result vector is undefined. The IDX parameter must be a vector index
+ /// constant type, which for most targets will be an integer pointer type.
///
- /// PRE_INC Similar to the unindexed mode where the effective address is
- /// PRE_DEC the value of the base pointer add / subtract the offset.
- /// It considers the computation as being folded into the load /
- /// store operation (i.e. the load / store does the address
- /// computation as well as performing the memory transaction).
- /// The base operand is always undefined. In addition to
- /// producing a chain, pre-indexed load produces two values
- /// (result of the load and the result of the address
- /// computation); a pre-indexed store produces one value (result
- /// of the address computation).
+ /// This operation supports extracting a fixed-width vector from a scalable
+ /// vector, but not the other way around.
+ EXTRACT_SUBVECTOR,
+
+ /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
+ /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int
+ /// values that indicate which value (or undef) each result element will
+ /// get. These constant ints are accessible through the
+ /// ShuffleVectorSDNode class. This is quite similar to the Altivec
+ /// 'vperm' instruction, except that the indices must be constants and are
+ /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
+ VECTOR_SHUFFLE,
+
+ /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
+ /// scalar value into element 0 of the resultant vector type. The top
+ /// elements 1 to N-1 of the N-element vector are undefined. The type
+ /// of the operand must match the vector element type, except when they
+ /// are integer types. In this case the operand is allowed to be wider
+ /// than the vector element type, and is implicitly truncated to it.
+ SCALAR_TO_VECTOR,
+
+ /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL
+ /// duplicated in all lanes. The type of the operand must match the vector
+ /// element type, except when they are integer types. In this case the
+ /// operand is allowed to be wider than the vector element type, and is
+ /// implicitly truncated to it.
+ SPLAT_VECTOR,
+
+ /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
+ /// producing an unsigned/signed value of type i[2*N], then return the top
+ /// part.
+ MULHU,
+ MULHS,
+
+ /// [US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned
+ /// integers.
+ SMIN,
+ SMAX,
+ UMIN,
+ UMAX,
+
+ /// Bitwise operators - logical and, logical or, logical xor.
+ AND,
+ OR,
+ XOR,
+
+ /// ABS - Determine the unsigned absolute value of a signed integer value of
+ /// the same bitwidth.
+ /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
+ /// is performed.
+ ABS,
+
+ /// Shift and rotation operations. After legalization, the type of the
+ /// shift amount is known to be TLI.getShiftAmountTy(). Before legalization
+ /// the shift amount can be any type, but care must be taken to ensure it is
+ /// large enough. TLI.getShiftAmountTy() is i8 on some targets, but before
+ /// legalization, types like i1024 can occur and i8 doesn't have enough bits
+ /// to represent the shift amount.
+ /// When the 1st operand is a vector, the shift amount must be in the same
+ /// type. (TLI.getShiftAmountTy() will return the same type when the input
+ /// type is a vector.)
+ /// For rotates and funnel shifts, the shift amount is treated as an unsigned
+ /// amount modulo the element size of the first operand.
///
- /// POST_INC The effective address is the value of the base pointer. The
- /// POST_DEC value of the offset operand is then added to / subtracted
- /// from the base after memory transaction. In addition to
- /// producing a chain, post-indexed load produces two values
- /// (the result of the load and the result of the base +/- offset
- /// computation); a post-indexed store produces one value (the
- /// the result of the base +/- offset computation).
- enum MemIndexedMode {
- UNINDEXED = 0,
- PRE_INC,
- PRE_DEC,
- POST_INC,
- POST_DEC
- };
+ /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
+ /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SHL,
+ SRA,
+ SRL,
+ ROTL,
+ ROTR,
+ FSHL,
+ FSHR,
- static const int LAST_INDEXED_MODE = POST_DEC + 1;
+ /// Byte Swap and Counting operators.
+ BSWAP,
+ CTTZ,
+ CTLZ,
+ CTPOP,
+ BITREVERSE,
+ PARITY,
- //===--------------------------------------------------------------------===//
- /// LoadExtType enum - This enum defines the three variants of LOADEXT
- /// (load with extension).
+ /// Bit counting operators with an undefined result for zero inputs.
+ CTTZ_ZERO_UNDEF,
+ CTLZ_ZERO_UNDEF,
+
+ /// Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not
+ /// i1 then the high bits must conform to getBooleanContents.
+ SELECT,
+
+ /// Select with a vector condition (op #0) and two vector operands (ops #1
+ /// and #2), returning a vector result. All vectors have the same length.
+ /// Much like the scalar select and setcc, each bit in the condition selects
+ /// whether the corresponding result element is taken from op #1 or op #2.
+ /// At first, the VSELECT condition is of vXi1 type. Later, targets may
+ /// change the condition type in order to match the VSELECT node using a
+ /// pattern. The condition follows the BooleanContent format of the target.
+ VSELECT,
+
+ /// Select with condition operator - This selects between a true value and
+ /// a false value (ops #2 and #3) based on the boolean result of comparing
+ /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
+ /// condition code in op #4, a CondCodeSDNode.
+ SELECT_CC,
+
+ /// SetCC operator - This evaluates to a true value iff the condition is
+ /// true. If the result value type is not i1 then the high bits conform
+ /// to getBooleanContents. The operands to this are the left and right
+ /// operands to compare (ops #0, and #1) and the condition code to compare
+ /// them with (op #2) as a CondCodeSDNode. If the operands are vector types
+ /// then the result type must also be a vector type.
+ SETCC,
+
+ /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
+ /// op #2 is a boolean indicating if there is an incoming carry. This
+ /// operator checks the result of "LHS - RHS - Carry", and can be used to
+ /// compare two wide integers:
+ /// (setcccarry lhshi rhshi (subcarry lhslo rhslo) cc).
+ /// Only valid for integers.
+ SETCCCARRY,
+
+ /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
+ /// integer shift operations. The operation ordering is:
+ /// [Lo,Hi] = op [LoLHS,HiLHS], Amt
+ SHL_PARTS,
+ SRA_PARTS,
+ SRL_PARTS,
+
+ /// Conversion operators. These are all single input single output
+ /// operations. For all of these, the result type must be strictly
+ /// wider or narrower (depending on the operation) than the source
+ /// type.
+
+ /// SIGN_EXTEND - Used for integer types, replicating the sign bit
+ /// into new bits.
+ SIGN_EXTEND,
+
+ /// ZERO_EXTEND - Used for integer types, zeroing the new bits.
+ ZERO_EXTEND,
+
+ /// ANY_EXTEND - Used for integer types. The high bits are undefined.
+ ANY_EXTEND,
+
+ /// TRUNCATE - Completely drop the high bits.
+ TRUNCATE,
+
+ /// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
+ /// depends on the first letter) to floating point.
+ SINT_TO_FP,
+ UINT_TO_FP,
+
+ /// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
+ /// sign extend a small value in a large integer register (e.g. sign
+ /// extending the low 8 bits of a 32-bit register to fill the top 24 bits
+ /// with the 7th bit). The size of the smaller type is indicated by the 1th
+ /// operand, a ValueType node.
+ SIGN_EXTEND_INREG,
+
+ /// ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an
+ /// in-register any-extension of the low lanes of an integer vector. The
+ /// result type must have fewer elements than the operand type, and those
+ /// elements must be larger integer types such that the total size of the
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is any-extended into the corresponding,
+ /// wider result elements with the high bits becoming undef.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
+ ANY_EXTEND_VECTOR_INREG,
+
+ /// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an
+ /// in-register sign-extension of the low lanes of an integer vector. The
+ /// result type must have fewer elements than the operand type, and those
+ /// elements must be larger integer types such that the total size of the
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is sign-extended into the corresponding,
+ /// wider result elements.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
+ SIGN_EXTEND_VECTOR_INREG,
+
+ /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
+ /// in-register zero-extension of the low lanes of an integer vector. The
+ /// result type must have fewer elements than the operand type, and those
+ /// elements must be larger integer types such that the total size of the
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is zero-extended into the corresponding,
+ /// wider result elements.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
+ ZERO_EXTEND_VECTOR_INREG,
+
+ /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
+ /// integer. These have the same semantics as fptosi and fptoui in IR. If
+ /// the FP value cannot fit in the integer type, the results are undefined.
+ FP_TO_SINT,
+ FP_TO_UINT,
+
+ /// FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a
+ /// signed or unsigned integer type with the bit width given in operand 1 with
+ /// the following semantics:
///
- /// SEXTLOAD loads the integer operand and sign extends it to a larger
- /// integer result type.
- /// ZEXTLOAD loads the integer operand and zero extends it to a larger
- /// integer result type.
- /// EXTLOAD is used for two things: floating point extending loads and
- /// integer extending loads [the top bits are undefined].
- enum LoadExtType {
- NON_EXTLOAD = 0,
- EXTLOAD,
- SEXTLOAD,
- ZEXTLOAD
- };
-
- static const int LAST_LOADEXT_TYPE = ZEXTLOAD + 1;
-
- NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
-
- //===--------------------------------------------------------------------===//
- /// ISD::CondCode enum - These are ordered carefully to make the bitfields
- /// below work out, when considering SETFALSE (something that never exists
- /// dynamically) as 0. "U" -> Unsigned (for integer operands) or Unordered
- /// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal
- /// to. If the "N" column is 1, the result of the comparison is undefined if
- /// the input is a NAN.
+ /// * If the value is NaN, zero is returned.
+ /// * If the value is larger/smaller than the largest/smallest integer,
+ /// the largest/smallest integer is returned (saturation).
+ /// * Otherwise the result of rounding the value towards zero is returned.
///
- /// All of these (except for the 'always folded ops') should be handled for
- /// floating point. For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT,
- /// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used.
+ /// The width given in operand 1 must be equal to, or smaller than, the scalar
+ /// result type width. It may end up being smaller than the result witdh as a
+ /// result of integer type legalization.
+ FP_TO_SINT_SAT,
+ FP_TO_UINT_SAT,
+
+ /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
+ /// down to the precision of the destination VT. TRUNC is a flag, which is
+ /// always an integer that is zero or one. If TRUNC is 0, this is a
+ /// normal rounding, if it is 1, this FP_ROUND is known to not change the
+ /// value of Y.
///
- /// Note that these are laid out in a specific order to allow bit-twiddling
- /// to transform conditions.
- enum CondCode {
- // Opcode N U L G E Intuitive operation
- SETFALSE, // 0 0 0 0 Always false (always folded)
- SETOEQ, // 0 0 0 1 True if ordered and equal
- SETOGT, // 0 0 1 0 True if ordered and greater than
- SETOGE, // 0 0 1 1 True if ordered and greater than or equal
- SETOLT, // 0 1 0 0 True if ordered and less than
- SETOLE, // 0 1 0 1 True if ordered and less than or equal
- SETONE, // 0 1 1 0 True if ordered and operands are unequal
- SETO, // 0 1 1 1 True if ordered (no nans)
- SETUO, // 1 0 0 0 True if unordered: isnan(X) | isnan(Y)
- SETUEQ, // 1 0 0 1 True if unordered or equal
- SETUGT, // 1 0 1 0 True if unordered or greater than
- SETUGE, // 1 0 1 1 True if unordered, greater than, or equal
- SETULT, // 1 1 0 0 True if unordered or less than
- SETULE, // 1 1 0 1 True if unordered, less than, or equal
- SETUNE, // 1 1 1 0 True if unordered or not equal
- SETTRUE, // 1 1 1 1 Always true (always folded)
- // Don't care operations: undefined if the input is a nan.
- SETFALSE2, // 1 X 0 0 0 Always false (always folded)
- SETEQ, // 1 X 0 0 1 True if equal
- SETGT, // 1 X 0 1 0 True if greater than
- SETGE, // 1 X 0 1 1 True if greater than or equal
- SETLT, // 1 X 1 0 0 True if less than
- SETLE, // 1 X 1 0 1 True if less than or equal
- SETNE, // 1 X 1 1 0 True if not equal
- SETTRUE2, // 1 X 1 1 1 Always true (always folded)
+ /// The TRUNC = 1 case is used in cases where we know that the value will
+ /// not be modified by the node, because Y is not using any of the extra
+ /// precision of source type. This allows certain transformations like
+ /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for
+ /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed.
+ FP_ROUND,
- SETCC_INVALID // Marker value.
- };
+ /// FLT_ROUNDS_ - Returns current rounding mode:
+ /// -1 Undefined
+ /// 0 Round to 0
+ /// 1 Round to nearest
+ /// 2 Round to +inf
+ /// 3 Round to -inf
+ /// Result is rounding mode and chain. Input is a chain.
+ FLT_ROUNDS_,
- /// Return true if this is a setcc instruction that performs a signed
- /// comparison when used with integer operands.
- inline bool isSignedIntSetCC(CondCode Code) {
- return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
- }
+ /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
+ FP_EXTEND,
- /// Return true if this is a setcc instruction that performs an unsigned
- /// comparison when used with integer operands.
- inline bool isUnsignedIntSetCC(CondCode Code) {
- return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
- }
+ /// BITCAST - This operator converts between integer, vector and FP
+ /// values, as if the value was stored to memory with one type and loaded
+ /// from the same address with the other type (or equivalently for vector
+ /// format conversions, etc). The source and result are required to have
+ /// the same bit size (e.g. f32 <-> i32). This can also be used for
+ /// int-to-int or fp-to-fp conversions, but that is a noop, deleted by
+ /// getNode().
+ ///
+ /// This operator is subtly different from the bitcast instruction from
+ /// LLVM-IR since this node may change the bits in the register. For
+ /// example, this occurs on big-endian NEON and big-endian MSA where the
+ /// layout of the bits in the register depends on the vector type and this
+ /// operator acts as a shuffle operation for some vector type combinations.
+ BITCAST,
- /// Return true if the specified condition returns true if the two operands to
- /// the condition are equal. Note that if one of the two operands is a NaN,
- /// this value is meaningless.
- inline bool isTrueWhenEqual(CondCode Cond) {
- return ((int)Cond & 1) != 0;
- }
+ /// ADDRSPACECAST - This operator converts between pointers of different
+ /// address spaces.
+ ADDRSPACECAST,
- /// This function returns 0 if the condition is always false if an operand is
- /// a NaN, 1 if the condition is always true if the operand is a NaN, and 2 if
- /// the condition is undefined if the operand is a NaN.
- inline unsigned getUnorderedFlavor(CondCode Cond) {
- return ((int)Cond >> 3) & 3;
- }
+ /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions
+ /// and truncation for half-precision (16 bit) floating numbers. These nodes
+ /// form a semi-softened interface for dealing with f16 (as an i16), which
+ /// is often a storage-only type but has native conversions.
+ FP16_TO_FP,
+ FP_TO_FP16,
+ STRICT_FP16_TO_FP,
+ STRICT_FP_TO_FP16,
- /// Return the operation corresponding to !(X op Y), where 'op' is a valid
- /// SetCC operation.
- CondCode getSetCCInverse(CondCode Operation, bool isInteger);
+ /// Perform various unary floating-point operations inspired by libm. For
+ /// FPOWI, the result is undefined if if the integer operand doesn't fit
+ /// into 32 bits.
+ FNEG,
+ FABS,
+ FSQRT,
+ FCBRT,
+ FSIN,
+ FCOS,
+ FPOWI,
+ FPOW,
+ FLOG,
+ FLOG2,
+ FLOG10,
+ FEXP,
+ FEXP2,
+ FCEIL,
+ FTRUNC,
+ FRINT,
+ FNEARBYINT,
+ FROUND,
+ FROUNDEVEN,
+ FFLOOR,
+ LROUND,
+ LLROUND,
+ LRINT,
+ LLRINT,
- /// Return the operation corresponding to (Y op X) when given the operation
- /// for (X op Y).
- CondCode getSetCCSwappedOperands(CondCode Operation);
+ /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
+ /// values.
+ //
+ /// In the case where a single input is a NaN (either signaling or quiet),
+ /// the non-NaN input is returned.
+ ///
+ /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
+ FMINNUM,
+ FMAXNUM,
- /// Return the result of a logical OR between different comparisons of
- /// identical values: ((X op1 Y) | (X op2 Y)). This function returns
- /// SETCC_INVALID if it is not possible to represent the resultant comparison.
- CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
+ /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
+ /// two values, following the IEEE-754 2008 definition. This differs from
+ /// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
+ /// signaling NaN, returns a quiet NaN.
+ FMINNUM_IEEE,
+ FMAXNUM_IEEE,
- /// Return the result of a logical AND between different comparisons of
- /// identical values: ((X op1 Y) & (X op2 Y)). This function returns
- /// SETCC_INVALID if it is not possible to represent the resultant comparison.
- CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
+ /// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
+ /// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
+ /// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
+ FMINIMUM,
+ FMAXIMUM,
-} // end llvm::ISD namespace
+ /// FSINCOS - Compute both fsin and fcos as a single operation.
+ FSINCOS,
-} // end llvm namespace
+ /// LOAD and STORE have token chains as their first operand, then the same
+ /// operands as an LLVM load/store instruction, then an offset node that
+ /// is added / subtracted from the base pointer to form the address (for
+ /// indexed memory ops).
+ LOAD,
+ STORE,
+
+ /// DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
+ /// to a specified boundary. This node always has two return values: a new
+ /// stack pointer value and a chain. The first operand is the token chain,
+ /// the second is the number of bytes to allocate, and the third is the
+ /// alignment boundary. The size is guaranteed to be a multiple of the
+ /// stack alignment, and the alignment is guaranteed to be bigger than the
+ /// stack alignment (if required) or 0 to get standard stack alignment.
+ DYNAMIC_STACKALLOC,
+
+ /// Control flow instructions. These all have token chains.
+
+ /// BR - Unconditional branch. The first operand is the chain
+ /// operand, the second is the MBB to branch to.
+ BR,
+
+ /// BRIND - Indirect branch. The first operand is the chain, the second
+ /// is the value to branch to, which must be of the same type as the
+ /// target's pointer type.
+ BRIND,
+
+ /// BR_JT - Jumptable branch. The first operand is the chain, the second
+ /// is the jumptable index, the last one is the jumptable entry index.
+ BR_JT,
+
+ /// BRCOND - Conditional branch. The first operand is the chain, the
+ /// second is the condition, the third is the block to branch to if the
+ /// condition is true. If the type of the condition is not i1, then the
+ /// high bits must conform to getBooleanContents. If the condition is undef,
+ /// it nondeterministically jumps to the block.
+ /// TODO: Its semantics w.r.t undef requires further discussion; we need to
+ /// make it sure that it is consistent with optimizations in MIR & the
+ /// meaning of IMPLICIT_DEF. See https://reviews.llvm.org/D92015
+ BRCOND,
+
+ /// BR_CC - Conditional branch. The behavior is like that of SELECT_CC, in
+ /// that the condition is represented as condition code, and two nodes to
+ /// compare, rather than as a combined SetCC node. The operands in order
+ /// are chain, cc, lhs, rhs, block to branch to if condition is true. If
+ /// condition is undef, it nondeterministically jumps to the block.
+ BR_CC,
+
+ /// INLINEASM - Represents an inline asm block. This node always has two
+ /// return values: a chain and a flag result. The inputs are as follows:
+ /// Operand #0 : Input chain.
+ /// Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string.
+ /// Operand #2 : a MDNodeSDNode with the !srcloc metadata.
+ /// Operand #3 : HasSideEffect, IsAlignStack bits.
+ /// After this, it is followed by a list of operands with this format:
+ /// ConstantSDNode: Flags that encode whether it is a mem or not, the
+ /// of operands that follow, etc. See InlineAsm.h.
+ /// ... however many operands ...
+ /// Operand #last: Optional, an incoming flag.
+ ///
+ /// The variable width operands are required to represent target addressing
+ /// modes as a single "operand", even though they may have multiple
+ /// SDOperands.
+ INLINEASM,
+
+ /// INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
+ INLINEASM_BR,
+
+ /// EH_LABEL - Represents a label in mid basic block used to track
+ /// locations needed for debug and exception handling tables. These nodes
+ /// take a chain as input and return a chain.
+ EH_LABEL,
+
+ /// ANNOTATION_LABEL - Represents a mid basic block label used by
+ /// annotations. This should remain within the basic block and be ordered
+ /// with respect to other call instructions, but loads and stores may float
+ /// past it.
+ ANNOTATION_LABEL,
+
+ /// CATCHRET - Represents a return from a catch block funclet. Used for
+ /// MSVC compatible exception handling. Takes a chain operand and a
+ /// destination basic block operand.
+ CATCHRET,
+
+ /// CLEANUPRET - Represents a return from a cleanup block funclet. Used for
+ /// MSVC compatible exception handling. Takes only a chain operand.
+ CLEANUPRET,
+
+ /// STACKSAVE - STACKSAVE has one operand, an input chain. It produces a
+ /// value, the same type as the pointer type for the system, and an output
+ /// chain.
+ STACKSAVE,
+
+ /// STACKRESTORE has two operands, an input chain and a pointer to restore
+ /// to it returns an output chain.
+ STACKRESTORE,
+
+ /// CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end
+ /// of a call sequence, and carry arbitrary information that target might
+ /// want to know. The first operand is a chain, the rest are specified by
+ /// the target and not touched by the DAG optimizers.
+ /// Targets that may use stack to pass call arguments define additional
+ /// operands:
+ /// - size of the call frame part that must be set up within the
+ /// CALLSEQ_START..CALLSEQ_END pair,
+ /// - part of the call frame prepared prior to CALLSEQ_START.
+ /// Both these parameters must be constants, their sum is the total call
+ /// frame size.
+ /// CALLSEQ_START..CALLSEQ_END pairs may not be nested.
+ CALLSEQ_START, // Beginning of a call sequence
+ CALLSEQ_END, // End of a call sequence
+
+ /// VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE,
+ /// and the alignment. It returns a pair of values: the vaarg value and a
+ /// new chain.
+ VAARG,
+
+ /// VACOPY - VACOPY has 5 operands: an input chain, a destination pointer,
+ /// a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the
+ /// source.
+ VACOPY,
+
+ /// VAEND, VASTART - VAEND and VASTART have three operands: an input chain,
+ /// pointer, and a SRCVALUE.
+ VAEND,
+ VASTART,
+
+ // PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
+ // with the preallocated call Value.
+ PREALLOCATED_SETUP,
+ // PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
+ // with the preallocated call Value, and a constant int.
+ PREALLOCATED_ARG,
+
+ /// SRCVALUE - This is a node type that holds a Value* that is used to
+ /// make reference to a value in the LLVM IR.
+ SRCVALUE,
+
+ /// MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to
+ /// reference metadata in the IR.
+ MDNODE_SDNODE,
+
+ /// PCMARKER - This corresponds to the pcmarker intrinsic.
+ PCMARKER,
+
+ /// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
+ /// It produces a chain and one i64 value. The only operand is a chain.
+ /// If i64 is not legal, the result will be expanded into smaller values.
+ /// Still, it returns an i64, so targets should set legality for i64.
+ /// The result is the content of the architecture-specific cycle
+ /// counter-like register (or other high accuracy low latency clock source).
+ READCYCLECOUNTER,
+
+ /// HANDLENODE node - Used as a handle for various purposes.
+ HANDLENODE,
+
+ /// INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It
+ /// takes as input a token chain, the pointer to the trampoline, the pointer
+ /// to the nested function, the pointer to pass for the 'nest' parameter, a
+ /// SRCVALUE for the trampoline and another for the nested function
+ /// (allowing targets to access the original Function*).
+ /// It produces a token chain as output.
+ INIT_TRAMPOLINE,
+
+ /// ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
+ /// It takes a pointer to the trampoline and produces a (possibly) new
+ /// pointer to the same trampoline with platform-specific adjustments
+ /// applied. The pointer it returns points to an executable block of code.
+ ADJUST_TRAMPOLINE,
+
+ /// TRAP - Trapping instruction
+ TRAP,
+
+ /// DEBUGTRAP - Trap intended to get the attention of a debugger.
+ DEBUGTRAP,
+
+ /// UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
+ UBSANTRAP,
+
+ /// PREFETCH - This corresponds to a prefetch intrinsic. The first operand
+ /// is the chain. The other operands are the address to prefetch,
+ /// read / write specifier, locality specifier and instruction / data cache
+ /// specifier.
+ PREFETCH,
+
+ /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope)
+ /// This corresponds to the fence instruction. It takes an input chain, and
+ /// two integer constants: an AtomicOrdering and a SynchronizationScope.
+ ATOMIC_FENCE,
+
+ /// Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr)
+ /// This corresponds to "load atomic" instruction.
+ ATOMIC_LOAD,
+
+ /// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val)
+ /// This corresponds to "store atomic" instruction.
+ ATOMIC_STORE,
+
+ /// Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+ /// For double-word atomic operations:
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmpLo, cmpHi,
+ /// swapLo, swapHi)
+ /// This corresponds to the cmpxchg instruction.
+ ATOMIC_CMP_SWAP,
+
+ /// Val, Success, OUTCHAIN
+ /// = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap)
+ /// N.b. this is still a strong cmpxchg operation, so
+ /// Success == "Val == cmp".
+ ATOMIC_CMP_SWAP_WITH_SUCCESS,
+
+ /// Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
+ /// Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
+ /// For double-word atomic operations:
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi)
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi)
+ /// These correspond to the atomicrmw instruction.
+ ATOMIC_SWAP,
+ ATOMIC_LOAD_ADD,
+ ATOMIC_LOAD_SUB,
+ ATOMIC_LOAD_AND,
+ ATOMIC_LOAD_CLR,
+ ATOMIC_LOAD_OR,
+ ATOMIC_LOAD_XOR,
+ ATOMIC_LOAD_NAND,
+ ATOMIC_LOAD_MIN,
+ ATOMIC_LOAD_MAX,
+ ATOMIC_LOAD_UMIN,
+ ATOMIC_LOAD_UMAX,
+ ATOMIC_LOAD_FADD,
+ ATOMIC_LOAD_FSUB,
+
+ // Masked load and store - consecutive vector load and store operations
+ // with additional mask operand that prevents memory accesses to the
+ // masked-off lanes.
+ //
+ // Val, OutChain = MLOAD(BasePtr, Mask, PassThru)
+ // OutChain = MSTORE(Value, BasePtr, Mask)
+ MLOAD,
+ MSTORE,
+
+ // Masked gather and scatter - load and store operations for a vector of
+ // random addresses with additional mask operand that prevents memory
+ // accesses to the masked-off lanes.
+ //
+ // Val, OutChain = GATHER(InChain, PassThru, Mask, BasePtr, Index, Scale)
+ // OutChain = SCATTER(InChain, Value, Mask, BasePtr, Index, Scale)
+ //
+ // The Index operand can have more vector elements than the other operands
+ // due to type legalization. The extra elements are ignored.
+ MGATHER,
+ MSCATTER,
+
+ /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
+ /// is the chain and the second operand is the alloca pointer.
+ LIFETIME_START,
+ LIFETIME_END,
+
+ /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
+ /// beginning and end of GC transition sequence, and carry arbitrary
+ /// information that target might need for lowering. The first operand is
+ /// a chain, the rest are specified by the target and not touched by the DAG
+ /// optimizers. GC_TRANSITION_START..GC_TRANSITION_END pairs may not be
+ /// nested.
+ GC_TRANSITION_START,
+ GC_TRANSITION_END,
+
+ /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of
+ /// the most recent dynamic alloca. For most targets that would be 0, but
+ /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time
+ /// known nonzero constant. The only operand here is the chain.
+ GET_DYNAMIC_AREA_OFFSET,
+
+ /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve
+ /// the sample counts quality.
+ PSEUDO_PROBE,
+
+ /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the
+ /// number of elements within a scalable vector. IMM is a constant integer
+ /// multiplier that is applied to the runtime value.
+ VSCALE,
+
+ /// Generic reduction nodes. These nodes represent horizontal vector
+ /// reduction operations, producing a scalar result.
+ /// The SEQ variants perform reductions in sequential order. The first
+ /// operand is an initial scalar accumulator value, and the second operand
+ /// is the vector to reduce.
+ /// E.g. RES = VECREDUCE_SEQ_FADD f32 ACC, <4 x f32> SRC_VEC
+ /// ... is equivalent to
+ /// RES = (((ACC + SRC_VEC[0]) + SRC_VEC[1]) + SRC_VEC[2]) + SRC_VEC[3]
+ VECREDUCE_SEQ_FADD,
+ VECREDUCE_SEQ_FMUL,
+
+ /// These reductions have relaxed evaluation order semantics, and have a
+ /// single vector operand. The order of evaluation is unspecified. For
+ /// pow-of-2 vectors, one valid legalizer expansion is to use a tree
+ /// reduction, i.e.:
+ /// For RES = VECREDUCE_FADD <8 x f16> SRC_VEC
+ /// PART_RDX = FADD SRC_VEC[0:3], SRC_VEC[4:7]
+ /// PART_RDX2 = FADD PART_RDX[0:1], PART_RDX[2:3]
+ /// RES = FADD PART_RDX2[0], PART_RDX2[1]
+ /// For non-pow-2 vectors, this can be computed by extracting each element
+ /// and performing the operation as if it were scalarized.
+ VECREDUCE_FADD,
+ VECREDUCE_FMUL,
+ /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
+ VECREDUCE_FMAX,
+ VECREDUCE_FMIN,
+ /// Integer reductions may have a result type larger than the vector element
+ /// type. However, the reduction is performed using the vector element type
+ /// and the value in the top bits is unspecified.
+ VECREDUCE_ADD,
+ VECREDUCE_MUL,
+ VECREDUCE_AND,
+ VECREDUCE_OR,
+ VECREDUCE_XOR,
+ VECREDUCE_SMAX,
+ VECREDUCE_SMIN,
+ VECREDUCE_UMAX,
+ VECREDUCE_UMIN,
+
+// Vector Predication
+#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID,
+#include "llvm/IR/VPIntrinsics.def"
+
+ /// BUILTIN_OP_END - This must be the last enum value in this list.
+ /// The target-specific pre-isel opcode values start here.
+ BUILTIN_OP_END
+};
+
+/// FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations
+/// which cannot raise FP exceptions should be less than this value.
+/// Those that do must not be less than this value.
+static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
+
+/// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
+/// which do not reference a specific memory location should be less than
+/// this value. Those that do must not be less than this value, and can
+/// be used with SelectionDAG::getMemIntrinsicNode.
+static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
+
+/// Get underlying scalar opcode for VECREDUCE opcode.
+/// For example ISD::AND for ISD::VECREDUCE_AND.
+NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
+
+/// Whether this is a vector-predicated Opcode.
+bool isVPOpcode(unsigned Opcode);
+
+/// The operand position of the vector mask.
+Optional<unsigned> getVPMaskIdx(unsigned Opcode);
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
+
+//===--------------------------------------------------------------------===//
+/// MemIndexedMode enum - This enum defines the load / store indexed
+/// addressing modes.
+///
+/// UNINDEXED "Normal" load / store. The effective address is already
+/// computed and is available in the base pointer. The offset
+/// operand is always undefined. In addition to producing a
+/// chain, an unindexed load produces one value (result of the
+/// load); an unindexed store does not produce a value.
+///
+/// PRE_INC Similar to the unindexed mode where the effective address is
+/// PRE_DEC the value of the base pointer add / subtract the offset.
+/// It considers the computation as being folded into the load /
+/// store operation (i.e. the load / store does the address
+/// computation as well as performing the memory transaction).
+/// The base operand is always undefined. In addition to
+/// producing a chain, pre-indexed load produces two values
+/// (result of the load and the result of the address
+/// computation); a pre-indexed store produces one value (result
+/// of the address computation).
+///
+/// POST_INC The effective address is the value of the base pointer. The
+/// POST_DEC value of the offset operand is then added to / subtracted
+/// from the base after memory transaction. In addition to
+/// producing a chain, post-indexed load produces two values
+/// (the result of the load and the result of the base +/- offset
+/// computation); a post-indexed store produces one value (the
+/// the result of the base +/- offset computation).
+enum MemIndexedMode { UNINDEXED = 0, PRE_INC, PRE_DEC, POST_INC, POST_DEC };
+
+static const int LAST_INDEXED_MODE = POST_DEC + 1;
+
+//===--------------------------------------------------------------------===//
+/// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's
+/// index parameter when calculating addresses.
+///
+/// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element))
+/// SIGNED_UNSCALED Addr = Base + (signed)Index
+/// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element))
+/// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index
+enum MemIndexType {
+ SIGNED_SCALED = 0,
+ SIGNED_UNSCALED,
+ UNSIGNED_SCALED,
+ UNSIGNED_UNSCALED
+};
+
+static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1;
+
+//===--------------------------------------------------------------------===//
+/// LoadExtType enum - This enum defines the three variants of LOADEXT
+/// (load with extension).
+///
+/// SEXTLOAD loads the integer operand and sign extends it to a larger
+/// integer result type.
+/// ZEXTLOAD loads the integer operand and zero extends it to a larger
+/// integer result type.
+/// EXTLOAD is used for two things: floating point extending loads and
+/// integer extending loads [the top bits are undefined].
+enum LoadExtType { NON_EXTLOAD = 0, EXTLOAD, SEXTLOAD, ZEXTLOAD };
+
+static const int LAST_LOADEXT_TYPE = ZEXTLOAD + 1;
+
+NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
+
+//===--------------------------------------------------------------------===//
+/// ISD::CondCode enum - These are ordered carefully to make the bitfields
+/// below work out, when considering SETFALSE (something that never exists
+/// dynamically) as 0. "U" -> Unsigned (for integer operands) or Unordered
+/// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal
+/// to. If the "N" column is 1, the result of the comparison is undefined if
+/// the input is a NAN.
+///
+/// All of these (except for the 'always folded ops') should be handled for
+/// floating point. For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT,
+/// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used.
+///
+/// Note that these are laid out in a specific order to allow bit-twiddling
+/// to transform conditions.
+enum CondCode {
+ // Opcode N U L G E Intuitive operation
+ SETFALSE, // 0 0 0 0 Always false (always folded)
+ SETOEQ, // 0 0 0 1 True if ordered and equal
+ SETOGT, // 0 0 1 0 True if ordered and greater than
+ SETOGE, // 0 0 1 1 True if ordered and greater than or equal
+ SETOLT, // 0 1 0 0 True if ordered and less than
+ SETOLE, // 0 1 0 1 True if ordered and less than or equal
+ SETONE, // 0 1 1 0 True if ordered and operands are unequal
+ SETO, // 0 1 1 1 True if ordered (no nans)
+ SETUO, // 1 0 0 0 True if unordered: isnan(X) | isnan(Y)
+ SETUEQ, // 1 0 0 1 True if unordered or equal
+ SETUGT, // 1 0 1 0 True if unordered or greater than
+ SETUGE, // 1 0 1 1 True if unordered, greater than, or equal
+ SETULT, // 1 1 0 0 True if unordered or less than
+ SETULE, // 1 1 0 1 True if unordered, less than, or equal
+ SETUNE, // 1 1 1 0 True if unordered or not equal
+ SETTRUE, // 1 1 1 1 Always true (always folded)
+ // Don't care operations: undefined if the input is a nan.
+ SETFALSE2, // 1 X 0 0 0 Always false (always folded)
+ SETEQ, // 1 X 0 0 1 True if equal
+ SETGT, // 1 X 0 1 0 True if greater than
+ SETGE, // 1 X 0 1 1 True if greater than or equal
+ SETLT, // 1 X 1 0 0 True if less than
+ SETLE, // 1 X 1 0 1 True if less than or equal
+ SETNE, // 1 X 1 1 0 True if not equal
+ SETTRUE2, // 1 X 1 1 1 Always true (always folded)
+
+ SETCC_INVALID // Marker value.
+};
+
+/// Return true if this is a setcc instruction that performs a signed
+/// comparison when used with integer operands.
+inline bool isSignedIntSetCC(CondCode Code) {
+ return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
+}
+
+/// Return true if this is a setcc instruction that performs an unsigned
+/// comparison when used with integer operands.
+inline bool isUnsignedIntSetCC(CondCode Code) {
+ return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
+}
+
+/// Return true if the specified condition returns true if the two operands to
+/// the condition are equal. Note that if one of the two operands is a NaN,
+/// this value is meaningless.
+inline bool isTrueWhenEqual(CondCode Cond) { return ((int)Cond & 1) != 0; }
+
+/// This function returns 0 if the condition is always false if an operand is
+/// a NaN, 1 if the condition is always true if the operand is a NaN, and 2 if
+/// the condition is undefined if the operand is a NaN.
+inline unsigned getUnorderedFlavor(CondCode Cond) {
+ return ((int)Cond >> 3) & 3;
+}
+
+/// Return the operation corresponding to !(X op Y), where 'op' is a valid
+/// SetCC operation.
+CondCode getSetCCInverse(CondCode Operation, EVT Type);
+
+namespace GlobalISel {
+/// Return the operation corresponding to !(X op Y), where 'op' is a valid
+/// SetCC operation. The U bit of the condition code has different meanings
+/// between floating point and integer comparisons and LLT's don't provide
+/// this distinction. As such we need to be told whether the comparison is
+/// floating point or integer-like. Pointers should use integer-like
+/// comparisons.
+CondCode getSetCCInverse(CondCode Operation, bool isIntegerLike);
+} // end namespace GlobalISel
+
+/// Return the operation corresponding to (Y op X) when given the operation
+/// for (X op Y).
+CondCode getSetCCSwappedOperands(CondCode Operation);
+
+/// Return the result of a logical OR between different comparisons of
+/// identical values: ((X op1 Y) | (X op2 Y)). This function returns
+/// SETCC_INVALID if it is not possible to represent the resultant comparison.
+CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type);
+
+/// Return the result of a logical AND between different comparisons of
+/// identical values: ((X op1 Y) & (X op2 Y)). This function returns
+/// SETCC_INVALID if it is not possible to represent the resultant comparison.
+CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type);
+
+} // namespace ISD
+
+} // namespace llvm
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/IndirectThunks.h b/linux-x64/clang/include/llvm/CodeGen/IndirectThunks.h
new file mode 100644
index 0000000..810acc0
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/IndirectThunks.h
@@ -0,0 +1,110 @@
+//===---- IndirectThunks.h - Indirect Thunk Base Class ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Contains a base class for Passes that inject an MI thunk.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INDIRECTTHUNKS_H
+#define LLVM_INDIRECTTHUNKS_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+
+template <typename Derived> class ThunkInserter {
+ Derived &getDerived() { return *static_cast<Derived *>(this); }
+
+protected:
+ bool InsertedThunks;
+ void doInitialization(Module &M) {}
+ void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
+
+public:
+ void init(Module &M) {
+ InsertedThunks = false;
+ getDerived().doInitialization(M);
+ }
+ // return `true` if `MMI` or `MF` was modified
+ bool run(MachineModuleInfo &MMI, MachineFunction &MF);
+};
+
+template <typename Derived>
+void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
+ StringRef Name) {
+ assert(Name.startswith(getDerived().getThunkPrefix()) &&
+ "Created a thunk with an unexpected prefix!");
+
+ Module &M = const_cast<Module &>(*MMI.getModule());
+ LLVMContext &Ctx = M.getContext();
+ auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+ Function *F =
+ Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ F->setComdat(M.getOrInsertComdat(Name));
+
+ // Add Attributes so that we don't create a frame, unwind information, or
+ // inline.
+ AttrBuilder B;
+ B.addAttribute(llvm::Attribute::NoUnwind);
+ B.addAttribute(llvm::Attribute::Naked);
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+ // Populate our function a bit so that we can verify.
+ BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+ IRBuilder<> Builder(Entry);
+
+ Builder.CreateRetVoid();
+
+ // MachineFunctions aren't created automatically for the IR-level constructs
+ // we already made. Create them and insert them into the module.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ // A MachineBasicBlock must not be created for the Entry block; code
+ // generation from an empty naked function in C source code also does not
+ // generate one. At least GlobalISel asserts if this invariant isn't
+ // respected.
+
+ // Set MF properties. We never use vregs...
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+template <typename Derived>
+bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+ // If MF is not a thunk, check to see if we need to insert a thunk.
+ if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
+ // If we've already inserted a thunk, nothing else to do.
+ if (InsertedThunks)
+ return false;
+
+ // Only add a thunk if one of the functions has the corresponding feature
+ // enabled in its subtarget, and doesn't enable external thunks.
+ // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+ // nothing will end up calling it.
+ // FIXME: It's a little silly to look at every function just to enumerate
+ // the subtargets, but eventually we'll want to look at them for indirect
+ // calls, so maybe this is OK.
+ if (!getDerived().mayUseThunk(MF))
+ return false;
+
+ getDerived().insertThunks(MMI);
+ InsertedThunks = true;
+ return true;
+ }
+
+ // If this *is* a thunk function, we need to populate it with the correct MI.
+ getDerived().populateThunk(MF);
+ return true;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/IntrinsicLowering.h b/linux-x64/clang/include/llvm/CodeGen/IntrinsicLowering.h
index daf2d9a..8593f54 100644
--- a/linux-x64/clang/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/linux-x64/clang/include/llvm/CodeGen/IntrinsicLowering.h
@@ -19,7 +19,6 @@
namespace llvm {
class CallInst;
-class Module;
class DataLayout;
class IntrinsicLowering {
diff --git a/linux-x64/clang/include/llvm/CodeGen/LexicalScopes.h b/linux-x64/clang/include/llvm/CodeGen/LexicalScopes.h
index 253d473..9617ba8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LexicalScopes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LexicalScopes.h
@@ -163,8 +163,8 @@
void getMachineBasicBlocks(const DILocation *DL,
SmallPtrSetImpl<const MachineBasicBlock *> &MBBs);
- /// dominates - Return true if DebugLoc's lexical scope dominates at least one
- /// machine instruction's lexical scope in a given machine basic block.
+ /// Return true if DebugLoc's lexical scope dominates at least one machine
+ /// instruction's lexical scope in a given machine basic block.
bool dominates(const DILocation *DL, MachineBasicBlock *MBB);
/// findLexicalScope - Find lexical scope, either regular or inlined, for the
@@ -194,9 +194,6 @@
return I != LexicalScopeMap.end() ? &I->second : nullptr;
}
- /// dump - Print data structures to dbgs().
- void dump() const;
-
/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
LexicalScope *getOrCreateAbstractScope(const DILocalScope *Scope);
@@ -250,6 +247,11 @@
/// CurrentFnLexicalScope - Top level scope for the current function.
///
LexicalScope *CurrentFnLexicalScope = nullptr;
+
+ /// Map a location to the set of basic blocks it dominates. This is a cache
+ /// for \ref LexicalScopes::getMachineBasicBlocks results.
+ using BlockSetT = SmallPtrSet<const MachineBasicBlock *, 4>;
+ DenseMap<const DILocation *, std::unique_ptr<BlockSetT>> DominatedBlocks;
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveInterval.h b/linux-x64/clang/include/llvm/CodeGen/LiveInterval.h
index 8bb8816..c2b158a 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveInterval.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveInterval.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
@@ -189,6 +190,10 @@
return start == Other.start && end == Other.end;
}
+ bool operator!=(const Segment &Other) const {
+ return !(*this == Other);
+ }
+
void dump() const;
};
@@ -224,7 +229,7 @@
/// Constructs a new LiveRange object.
LiveRange(bool UseSegmentSet = false)
- : segmentSet(UseSegmentSet ? llvm::make_unique<SegmentSet>()
+ : segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>()
: nullptr) {}
/// Constructs a new LiveRange object by copying segments and valnos from
@@ -593,10 +598,9 @@
/// @p End.
bool isUndefIn(ArrayRef<SlotIndex> Undefs, SlotIndex Begin,
SlotIndex End) const {
- return std::any_of(Undefs.begin(), Undefs.end(),
- [Begin,End] (SlotIndex Idx) -> bool {
- return Begin <= Idx && Idx < End;
- });
+ return llvm::any_of(Undefs, [Begin, End](SlotIndex Idx) -> bool {
+ return Begin <= Idx && Idx < End;
+ });
}
/// Flush segment set into the regular segment vector.
@@ -613,7 +617,7 @@
/// subranges). Returns true if found at least one index.
template <typename Range, typename OutputIt>
bool findIndexesLiveAt(Range &&R, OutputIt O) const {
- assert(std::is_sorted(R.begin(), R.end()));
+ assert(llvm::is_sorted(R));
auto Idx = R.begin(), EndIdx = R.end();
auto Seg = segments.begin(), EndSeg = segments.end();
bool Found = false;
@@ -621,11 +625,12 @@
// if the Seg is lower find first segment that is above Idx using binary
// search
if (Seg->end <= *Idx) {
- Seg = std::upper_bound(++Seg, EndSeg, *Idx,
- [=](typename std::remove_reference<decltype(*Idx)>::type V,
- const typename std::remove_reference<decltype(*Seg)>::type &S) {
- return V < S.end;
- });
+ Seg = std::upper_bound(
+ ++Seg, EndSeg, *Idx,
+ [=](std::remove_reference_t<decltype(*Idx)> V,
+ const std::remove_reference_t<decltype(*Seg)> &S) {
+ return V < S.end;
+ });
if (Seg == EndSeg)
break;
}
@@ -699,12 +704,16 @@
private:
SubRange *SubRanges = nullptr; ///< Single linked list of subregister live
/// ranges.
+ const Register Reg; // the register or stack slot of this interval.
+ float Weight = 0.0; // weight of this interval
public:
- const unsigned reg; // the register or stack slot of this interval.
- float weight; // weight of this interval
+ Register reg() const { return Reg; }
+ float weight() const { return Weight; }
+ void incrementWeight(float Inc) { Weight += Inc; }
+ void setWeight(float Value) { Weight = Value; }
- LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {}
+ LiveInterval(unsigned Reg, float Weight) : Reg(Reg), Weight(Weight) {}
~LiveInterval() {
clearSubRanges();
@@ -726,10 +735,10 @@
++*this;
return res;
}
- bool operator!=(const SingleLinkedListIterator<T> &Other) {
+ bool operator!=(const SingleLinkedListIterator<T> &Other) const {
return P != Other.operator->();
}
- bool operator==(const SingleLinkedListIterator<T> &Other) {
+ bool operator==(const SingleLinkedListIterator<T> &Other) const {
return P == Other.operator->();
}
T &operator*() const {
@@ -801,14 +810,10 @@
unsigned getSize() const;
/// isSpillable - Can this interval be spilled?
- bool isSpillable() const {
- return weight != huge_valf;
- }
+ bool isSpillable() const { return Weight != huge_valf; }
/// markNotSpillable - Mark interval as not spillable
- void markNotSpillable() {
- weight = huge_valf;
- }
+ void markNotSpillable() { Weight = huge_valf; }
/// For a given lane mask @p LaneMask, compute indexes at which the
/// lane is marked undefined by subregister <def,read-undef> definitions.
@@ -829,18 +834,43 @@
/// function will be applied to the L0010 and L0008 subranges.
///
/// \p Indexes and \p TRI are required to clean up the VNIs that
- /// don't defne the related lane masks after they get shrunk. E.g.,
+ /// don't define the related lane masks after they get shrunk. E.g.,
/// when L000F gets split into L0007 and L0008 maybe only a subset
/// of the VNIs that defined L000F defines L0007.
+ ///
+ /// The clean up of the VNIs need to look at the actual instructions
+ /// to decide what is or is not live at a definition point. If the
+ /// update of the subranges occurs while the IR does not reflect these
+ /// changes, \p ComposeSubRegIdx can be used to specify how the
+ /// definition are going to be rewritten.
+ /// E.g., let say we want to merge:
+ /// V1.sub1:<2 x s32> = COPY V2.sub3:<4 x s32>
+ /// We do that by choosing a class where sub1:<2 x s32> and sub3:<4 x s32>
+ /// overlap, i.e., by choosing a class where we can find "offset + 1 == 3".
+ /// Put differently we align V2's sub3 with V1's sub1:
+ /// V2: sub0 sub1 sub2 sub3
+ /// V1: <offset> sub0 sub1
+ ///
+ /// This offset will look like a composed subregidx in the the class:
+ /// V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ /// => V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ ///
+ /// Now if we didn't rewrite the uses and def of V1, all the checks for V1
+ /// need to account for this offset.
+ /// This happens during coalescing where we update the live-ranges while
+ /// still having the old IR around because updating the IR on-the-fly
+ /// would actually clobber some information on how the live-ranges that
+ /// are being updated look like.
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI);
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx = 0);
bool operator<(const LiveInterval& other) const {
const SlotIndex &thisIndex = beginIndex();
const SlotIndex &otherIndex = other.beginIndex();
- return std::tie(thisIndex, reg) < std::tie(otherIndex, other.reg);
+ return std::tie(thisIndex, Reg) < std::tie(otherIndex, other.Reg);
}
void print(raw_ostream &OS) const;
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveIntervalCalc.h b/linux-x64/clang/include/llvm/CodeGen/LiveIntervalCalc.h
new file mode 100644
index 0000000..76005e8
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveIntervalCalc.h
@@ -0,0 +1,71 @@
+//===- LiveIntervalCalc.h - Calculate live intervals -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveIntervalCalc class is an extension of LiveRangeCalc targeted to the
+// computation and modification of the LiveInterval variants of LiveRanges.
+// LiveIntervals are meant to track liveness of registers and stack slots and
+// LiveIntervalCalc adds to LiveRangeCalc all the machinery requied to
+// construct the liveness of virtual registers tracked by a LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEINTERVALCALC_H
+#define LLVM_LIB_CODEGEN_LIVEINTERVALCALC_H
+
+#include "llvm/CodeGen/LiveRangeCalc.h"
+
+namespace llvm {
+
+template <class NodeT> class DomTreeNodeBase;
+
+using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
+
+class LiveIntervalCalc : public LiveRangeCalc {
+ /// Extend the live range of @p LR to reach all uses of Reg.
+ ///
+ /// If @p LR is a main range, or if @p LI is null, then all uses must be
+ /// jointly dominated by the definitions from @p LR. If @p LR is a subrange
+ /// of the live interval @p LI, corresponding to lane mask @p LaneMask,
+ /// all uses must be jointly dominated by the definitions from @p LR
+ /// together with definitions of other lanes where @p LR becomes undefined
+ /// (via <def,read-undef> operands).
+ /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e.
+ /// LaneBitmask::getAll().
+ void extendToUses(LiveRange &LR, Register Reg, LaneBitmask LaneMask,
+ LiveInterval *LI = nullptr);
+
+public:
+ LiveIntervalCalc() = default;
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveRange &LR, Register Reg);
+
+ /// Extend the live range of @p LR to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveRange &LR, MCRegister PhysReg) {
+ extendToUses(LR, PhysReg, LaneBitmask::getAll());
+ }
+
+ /// Calculates liveness for the register specified in live interval @p LI.
+ /// Creates subregister live ranges as needed if subreg liveness tracking is
+ /// enabled.
+ void calculate(LiveInterval &LI, bool TrackSubRegs);
+
+ /// For live interval \p LI with correct SubRanges construct matching
+ /// information for the main live range. Expects the main live range to not
+ /// have any segments or value numbers.
+ void constructMainRangeFromSubranges(LiveInterval &LI);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_LIVEINTERVALCALC_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveIntervalUnion.h b/linux-x64/clang/include/llvm/CodeGen/LiveIntervalUnion.h
index 05506d2..ad9e06d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -75,6 +75,7 @@
bool empty() const { return Segments.empty(); }
SlotIndex startIndex() const { return Segments.start(); }
+ SlotIndex endIndex() const { return Segments.stop(); }
// Provide public access to the underlying map to allow overlap iteration.
using Map = LiveSegments;
@@ -103,6 +104,9 @@
void verify(LiveVirtRegBitSet& VisitedVRegs);
#endif
+ // Get any virtual register that is assign to this physical unit
+ LiveInterval *getOneVReg() const;
+
/// Query interferences between a single live virtual register and a live
/// interval union.
class Query {
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveIntervals.h b/linux-x64/clang/include/llvm/CodeGen/LiveIntervals.h
index 588b0f9..fa08166 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveIntervals.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveIntervals.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -40,8 +39,9 @@
extern cl::opt<bool> UseSegmentSetForPhysRegs;
+class AAResults;
class BitVector;
-class LiveRangeCalc;
+class LiveIntervalCalc;
class MachineBlockFrequencyInfo;
class MachineDominatorTree;
class MachineFunction;
@@ -56,10 +56,10 @@
MachineRegisterInfo* MRI;
const TargetRegisterInfo* TRI;
const TargetInstrInfo* TII;
- AliasAnalysis *AA;
+ AAResults *AA;
SlotIndexes* Indexes;
MachineDominatorTree *DomTree = nullptr;
- LiveRangeCalc *LRCalc = nullptr;
+ LiveIntervalCalc *LICalc = nullptr;
/// Special pool allocator for VNInfo's (LiveInterval val#).
VNInfo::Allocator VNInfoAllocator;
@@ -111,44 +111,45 @@
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB);
- LiveInterval &getInterval(unsigned Reg) {
+ LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
- return *VirtRegIntervals[Reg];
- else
- return createAndComputeVirtRegInterval(Reg);
+ return *VirtRegIntervals[Reg.id()];
+
+ return createAndComputeVirtRegInterval(Reg);
}
- const LiveInterval &getInterval(unsigned Reg) const {
+ const LiveInterval &getInterval(Register Reg) const {
return const_cast<LiveIntervals*>(this)->getInterval(Reg);
}
- bool hasInterval(unsigned Reg) const {
- return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg];
+ bool hasInterval(Register Reg) const {
+ return VirtRegIntervals.inBounds(Reg.id()) &&
+ VirtRegIntervals[Reg.id()];
}
/// Interval creation.
- LiveInterval &createEmptyInterval(unsigned Reg) {
+ LiveInterval &createEmptyInterval(Register Reg) {
assert(!hasInterval(Reg) && "Interval already exists!");
- VirtRegIntervals.grow(Reg);
- VirtRegIntervals[Reg] = createInterval(Reg);
- return *VirtRegIntervals[Reg];
+ VirtRegIntervals.grow(Reg.id());
+ VirtRegIntervals[Reg.id()] = createInterval(Reg);
+ return *VirtRegIntervals[Reg.id()];
}
- LiveInterval &createAndComputeVirtRegInterval(unsigned Reg) {
+ LiveInterval &createAndComputeVirtRegInterval(Register Reg) {
LiveInterval &LI = createEmptyInterval(Reg);
computeVirtRegInterval(LI);
return LI;
}
/// Interval removal.
- void removeInterval(unsigned Reg) {
+ void removeInterval(Register Reg) {
delete VirtRegIntervals[Reg];
VirtRegIntervals[Reg] = nullptr;
}
/// Given a register and an instruction, adds a live segment from that
/// instruction to the end of its MBB.
- LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg,
+ LiveInterval::Segment addSegmentToEndOfBlock(Register Reg,
MachineInstr &startInst);
/// After removing some uses of a register, shrink its live range to just
@@ -166,7 +167,7 @@
/// the lane mask of the subregister range.
/// This may leave the subrange empty which needs to be cleaned up with
/// LiveInterval::removeEmptySubranges() afterwards.
- void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
+ void shrinkToUses(LiveInterval::SubRange &SR, Register Reg);
/// Extend the live range \p LR to reach all points in \p Indices. The
/// points in the \p Indices array must be jointly dominated by the union
@@ -211,7 +212,7 @@
return Indexes;
}
- AliasAnalysis *getAliasAnalysis() const {
+ AAResults *getAliasAnalysis() const {
return AA;
}
@@ -309,16 +310,16 @@
/// \param UpdateFlags Update live intervals for nonallocatable physregs.
void handleMove(MachineInstr &MI, bool UpdateFlags = false);
- /// Update intervals for operands of \p MI so that they begin/end on the
- /// SlotIndex for \p BundleStart.
+ /// Update intervals of operands of all instructions in the newly
+ /// created bundle specified by \p BundleStart.
///
/// \param UpdateFlags Update live intervals for nonallocatable physregs.
///
- /// Requires MI and BundleStart to have SlotIndexes, and assumes
- /// existing liveness is accurate. BundleStart should be the first
- /// instruction in the Bundle.
- void handleMoveIntoBundle(MachineInstr &MI, MachineInstr &BundleStart,
- bool UpdateFlags = false);
+ /// Assumes existing liveness is accurate.
+ /// \pre BundleStart should be the first instruction in the Bundle.
+ /// \pre BundleStart should not have a have SlotIndex as one will be assigned.
+ void handleMoveIntoNewBundle(MachineInstr &BundleStart,
+ bool UpdateFlags = false);
/// Update live intervals for instructions in a range of iterators. It is
/// intended for use after target hooks that may insert or remove
@@ -332,7 +333,7 @@
void repairIntervalsInRange(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- ArrayRef<unsigned> OrigRegs);
+ ArrayRef<Register> OrigRegs);
// Register mask functions.
//
@@ -421,7 +422,7 @@
/// Reg. Subsequent uses should rely on on-demand recomputation. \note This
/// method can result in inconsistent liveness tracking if multiple phyical
/// registers share a regunit, and should be used cautiously.
- void removeAllRegUnitsForPhysReg(unsigned Reg) {
+ void removeAllRegUnitsForPhysReg(MCRegister Reg) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
removeRegUnit(*Units);
}
@@ -429,7 +430,7 @@
/// Remove value numbers and related live segments starting at position
/// \p Pos that are part of any liverange of physical register \p Reg or one
/// of its subregisters.
- void removePhysRegDefAt(unsigned Reg, SlotIndex Pos);
+ void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos);
/// Remove value number and related live segments of \p LI and its subranges
/// that start at position \p Pos.
@@ -461,18 +462,18 @@
bool computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead);
- static LiveInterval* createInterval(unsigned Reg);
+ static LiveInterval *createInterval(Register Reg);
void printInstrs(raw_ostream &O) const;
void dumpInstrs() const;
void computeLiveInRegUnits();
void computeRegUnitRange(LiveRange&, unsigned Unit);
- void computeVirtRegInterval(LiveInterval&);
+ bool computeVirtRegInterval(LiveInterval&);
using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
void extendSegmentsToUses(LiveRange &Segments,
- ShrinkToUsesWorkList &WorkList, unsigned Reg,
+ ShrinkToUsesWorkList &WorkList, Register Reg,
LaneBitmask LaneMask);
/// Helper function for repairIntervalsInRange(), walks backwards and
@@ -482,7 +483,7 @@
void repairOldRegInRange(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
const SlotIndex endIdx, LiveRange &LR,
- unsigned Reg,
+ Register Reg,
LaneBitmask LaneMask = LaneBitmask::getAll());
class HMEditor;
diff --git a/linux-x64/clang/include/llvm/CodeGen/LivePhysRegs.h b/linux-x64/clang/include/llvm/CodeGen/LivePhysRegs.h
index 50da0b3..0858934 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LivePhysRegs.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LivePhysRegs.h
@@ -137,6 +137,9 @@
/// Live out registers are the union of the live-in registers of the successor
/// blocks and pristine registers. Live out registers of the end block are the
/// callee saved registers.
+ /// If a register is not added by this method, it is guaranteed to not be
+ /// live out from MBB, although a sub-register may be. This is true
+ /// both before and after regalloc.
void addLiveOuts(const MachineBasicBlock &MBB);
/// Adds all live-out registers of basic block \p MBB but skips pristine
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveRangeCalc.h b/linux-x64/clang/include/llvm/CodeGen/LiveRangeCalc.h
new file mode 100644
index 0000000..bbb6f2d
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,270 @@
+//===- LiveRangeCalc.h - Calculate live ranges -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to implement the computation of
+// live ranges from scratch.
+// It caches information about values in the CFG to speed up repeated
+// operations on the same live range. The cache can be shared by
+// non-overlapping live ranges. SplitKit uses that when computing the live
+// range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
+#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <utility>
+
+namespace llvm {
+
+template <class NodeT> class DomTreeNodeBase;
+class MachineDominatorTree;
+class MachineFunction;
+class MachineRegisterInfo;
+
+using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
+
+class LiveRangeCalc {
+ const MachineFunction *MF = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ SlotIndexes *Indexes = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ VNInfo::Allocator *Alloc = nullptr;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>;
+
+ /// Bit vector of active entries in LiveOut, also used as a visited set by
+ /// findReachingDefs. One entry per basic block, indexed by block number.
+ /// This is kept as a separate bit vector because it can be cleared quickly
+ /// when switching live ranges.
+ BitVector Seen;
+
+ /// Map LiveRange to sets of blocks (represented by bit vectors) that
+ /// in the live range are defined on entry and undefined on entry.
+ /// A block is defined on entry if there is a path from at least one of
+ /// the defs in the live range to the entry of the block, and conversely,
+ /// a block is undefined on entry, if there is no such path (i.e. no
+ /// definition reaches the entry of the block). A single LiveRangeCalc
+ /// object is used to track live-out information for multiple registers
+ /// in live range splitting (which is ok, since the live ranges of these
+ /// registers do not overlap), but the defined/undefined information must
+ /// be kept separate for each individual range.
+ /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
+ using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>;
+ EntryInfoMap EntryInfos;
+
+ /// Map each basic block where a live range is live out to the live-out value
+ /// and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap Map;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // The live range set that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveRange &LR;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value = nullptr;
+
+ LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
+ : LR(LR), DomNode(node), Kill(kill) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// Check if the entry to block @p MBB can be reached by any of the defs
+ /// in @p LR. Return true if none of the defs reach the entry to @p MBB.
+ bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+ MachineBasicBlock &MBB, BitVector &DefOnEntry,
+ BitVector &UndefOnEntry);
+
+ /// Find the set of defs that can reach @p Kill. @p Kill must belong to
+ /// @p UseMBB.
+ ///
+ /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill,
+ /// all paths from the def to @p UseMBB are added to @p LR, and the function
+ /// returns true.
+ ///
+ /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
+ /// live in are added to the LiveIn array, and the function returns false.
+ ///
+ /// The array @p Undef provides the locations where the range @p LR becomes
+ /// undefined by <def,read-undef> operands on other subranges. If @p Undef
+ /// is non-empty and @p Kill is jointly dominated only by the entries of
+ /// @p Undef, the function returns false.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use,
+ unsigned PhysReg, ArrayRef<SlotIndex> Undefs);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA();
+
+ /// Transfer information from the LiveIn vector to the live ranges and update
+ /// the given @p LiveOuts.
+ void updateFromLiveIns();
+
+protected:
+ /// Some getters to expose in a read-only way some private fields to
+ /// subclasses.
+ const MachineFunction *getMachineFunction() { return MF; }
+ const MachineRegisterInfo *getRegInfo() const { return MRI; }
+ SlotIndexes *getIndexes() { return Indexes; }
+ MachineDominatorTree *getDomTree() { return DomTree; }
+ VNInfo::Allocator *getVNAlloc() { return Alloc; }
+
+ /// Reset Map and Seen fields.
+ void resetLiveOutMap();
+
+public:
+ LiveRangeCalc() = default;
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *mf, SlotIndexes *SI,
+ MachineDominatorTree *MDT, VNInfo::Allocator *VNIA);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// Extend the live range of @p LR to reach @p Use.
+ ///
+ /// The existing values in @p LR must be live so they jointly dominate @p Use.
+ /// If @p Use is not dominated by a single existing value, PHI-defs are
+ /// inserted as required to preserve SSA form.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ Map[MBB] = LiveOutPair(VNI, nullptr);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LR The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LR, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues();
+
+ /// A diagnostic function to check if the end of the block @p MBB is
+ /// jointly dominated by the blocks corresponding to the slot indices
+ /// in @p Defs. This function is mainly for use in self-verification
+ /// checks.
+ LLVM_ATTRIBUTE_UNUSED
+ static bool isJointlyDominated(const MachineBasicBlock *MBB,
+ ArrayRef<SlotIndex> Defs,
+ const SlotIndexes &Indexes);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveRangeEdit.h b/linux-x64/clang/include/llvm/CodeGen/LiveRangeEdit.h
index 6519937..87d48ad 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveRangeEdit.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +32,7 @@
namespace llvm {
+class AAResults;
class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineInstr;
@@ -56,19 +56,19 @@
/// Called when a virtual register is no longer used. Return false to defer
/// its deletion from LiveIntervals.
- virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+ virtual bool LRE_CanEraseVirtReg(Register) { return true; }
/// Called before shrinking the live range of a virtual register.
- virtual void LRE_WillShrinkVirtReg(unsigned) {}
+ virtual void LRE_WillShrinkVirtReg(Register) {}
/// Called after cloning a virtual register.
/// This is used for new registers representing connected components of Old.
- virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+ virtual void LRE_DidCloneVirtReg(Register New, Register Old) {}
};
private:
LiveInterval *Parent;
- SmallVectorImpl<unsigned> &NewRegs;
+ SmallVectorImpl<Register> &NewRegs;
MachineRegisterInfo &MRI;
LiveIntervals &LIS;
VirtRegMap *VRM;
@@ -94,7 +94,7 @@
SmallPtrSet<const VNInfo *, 4> Rematted;
/// scanRemattable - Identify the Parent values that may rematerialize.
- void scanRemattable(AliasAnalysis *aa);
+ void scanRemattable(AAResults *aa);
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
/// OrigIdx are also available with the same value at UseIdx.
@@ -110,18 +110,18 @@
/// Helper for eliminateDeadDefs.
void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
- AliasAnalysis *AA);
+ AAResults *AA);
/// MachineRegisterInfo callback to notify when new virtual
/// registers are created.
- void MRI_NoteNewVirtualRegister(unsigned VReg) override;
+ void MRI_NoteNewVirtualRegister(Register VReg) override;
/// Check if MachineOperand \p MO is a last use/kill either in the
/// main live range of \p LI or in one of the matching subregister ranges.
bool useIsKill(const LiveInterval &LI, const MachineOperand &MO) const;
/// Create a new empty interval based on OldReg.
- LiveInterval &createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges);
+ LiveInterval &createEmptyIntervalFrom(Register OldReg, bool createSubRanges);
public:
/// Create a LiveRangeEdit for breaking down parent into smaller pieces.
@@ -135,7 +135,7 @@
/// be done. This could be the case if called before Regalloc.
/// @param deadRemats The collection of all the instructions defining an
/// original reg and are dead after remat.
- LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
+ LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<Register> &newRegs,
MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
Delegate *delegate = nullptr,
SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
@@ -152,15 +152,15 @@
return *Parent;
}
- unsigned getReg() const { return getParent().reg; }
+ Register getReg() const { return getParent().reg(); }
/// Iterator for accessing the new registers added by this edit.
- using iterator = SmallVectorImpl<unsigned>::const_iterator;
+ using iterator = SmallVectorImpl<Register>::const_iterator;
iterator begin() const { return NewRegs.begin() + FirstNew; }
iterator end() const { return NewRegs.end(); }
unsigned size() const { return NewRegs.size() - FirstNew; }
bool empty() const { return size() == 0; }
- unsigned get(unsigned idx) const { return NewRegs[idx + FirstNew]; }
+ Register get(unsigned idx) const { return NewRegs[idx + FirstNew]; }
/// pop_back - It allows LiveRangeEdit users to drop new registers.
/// The context is when an original def instruction of a register is
@@ -172,12 +172,12 @@
/// we want to drop it from the NewRegs set.
void pop_back() { NewRegs.pop_back(); }
- ArrayRef<unsigned> regs() const {
+ ArrayRef<Register> regs() const {
return makeArrayRef(NewRegs).slice(FirstNew);
}
/// createFrom - Create a new virtual register based on OldReg.
- unsigned createFrom(unsigned OldReg);
+ Register createFrom(Register OldReg);
/// create - Create a new register with the same class and original slot as
/// parent.
@@ -185,17 +185,17 @@
return createEmptyIntervalFrom(getReg(), true);
}
- unsigned create() { return createFrom(getReg()); }
+ Register create() { return createFrom(getReg()); }
/// anyRematerializable - Return true if any parent values may be
/// rematerializable.
/// This function must be called before any rematerialization is attempted.
- bool anyRematerializable(AliasAnalysis *);
+ bool anyRematerializable(AAResults *);
/// checkRematerializable - Manually add VNI to the list of rematerializable
/// values if DefMI may be rematerializable.
bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
- AliasAnalysis *);
+ AAResults *);
/// Remat - Information needed to rematerialize at a specific location.
struct Remat {
@@ -234,7 +234,7 @@
/// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
/// to erase it from LIS.
- void eraseVirtReg(unsigned Reg);
+ void eraseVirtReg(Register Reg);
/// eliminateDeadDefs - Try to delete machine instructions that are now dead
/// (allDefsAreDead returns true). This may cause live intervals to be trimmed
@@ -243,8 +243,8 @@
/// allocator. These registers should not be split into new intervals
/// as currently those new intervals are not guaranteed to spill.
void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled = None,
- AliasAnalysis *AA = nullptr);
+ ArrayRef<Register> RegsBeingSpilled = None,
+ AAResults *AA = nullptr);
/// calculateRegClassAndHint - Recompute register class and hint for each new
/// register.
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveRegMatrix.h b/linux-x64/clang/include/llvm/CodeGen/LiveRegMatrix.h
index ab4d44f..fc67bce 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveRegMatrix.h
@@ -104,19 +104,19 @@
/// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
/// When there is more than one kind of interference, the InterferenceKind
/// with the highest enum value is returned.
- InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+ InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg);
/// Check for interference in the segment [Start, End) that may prevent
/// assignment to PhysReg. If this function returns true, there is
/// interference in the segment [Start, End) of some other interval already
/// assigned to PhysReg. If this function returns false, PhysReg is free at
/// the segment [Start, End).
- bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg);
+ bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
/// Assign VirtReg to PhysReg.
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
/// update VirtRegMap. The live range is expected to be available in PhysReg.
- void assign(LiveInterval &VirtReg, unsigned PhysReg);
+ void assign(LiveInterval &VirtReg, MCRegister PhysReg);
/// Unassign VirtReg from its PhysReg.
/// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
@@ -124,7 +124,7 @@
void unassign(LiveInterval &VirtReg);
/// Returns true if the given \p PhysReg has any live intervals assigned.
- bool isPhysRegUsed(unsigned PhysReg) const;
+ bool isPhysRegUsed(MCRegister PhysReg) const;
//===--------------------------------------------------------------------===//
// Low-level interface.
@@ -136,22 +136,25 @@
/// Check for regmask interference only.
/// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
/// If PhysReg is null, check if VirtReg crosses any regmask operands.
- bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+ bool checkRegMaskInterference(LiveInterval &VirtReg,
+ MCRegister PhysReg = MCRegister::NoRegister);
/// Check for regunit interference only.
/// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
/// register units.
- bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+ bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg);
/// Query a line of the assigned virtual register matrix directly.
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
/// This returns a reference to an internal Query data structure that is only
/// valid until the next query() call.
- LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
+ LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit);
/// Directly access the live interval unions per regunit.
/// This returns an array indexed by the regunit number.
LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
+
+ Register getOneVReg(unsigned PhysReg) const;
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveRegUnits.h b/linux-x64/clang/include/llvm/CodeGen/LiveRegUnits.h
index 7dbb2fe..39a1ec4 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveRegUnits.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveRegUnits.h
@@ -15,7 +15,7 @@
#define LLVM_CODEGEN_LIVEREGUNITS_H
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -53,8 +53,8 @@
ModifiedRegUnits.addRegsInMask(O->getRegMask());
if (!O->isReg())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Reg.isPhysical())
continue;
if (O->isDef()) {
// Some architectures (e.g. AArch64 XZR/WZR) have registers that are
@@ -67,7 +67,6 @@
UsedRegUnits.addReg(Reg);
}
}
- return;
}
/// Initialize and clear the set.
@@ -160,6 +159,19 @@
void addPristines(const MachineFunction &MF);
};
+/// Returns an iterator range over all physical register and mask operands for
+/// \p MI and bundled instructions. This also skips any debug operands.
+inline iterator_range<filter_iterator<
+ ConstMIBundleOperands, std::function<bool(const MachineOperand &)>>>
+phys_regs_and_masks(const MachineInstr &MI) {
+ std::function<bool(const MachineOperand &)> Pred =
+ [](const MachineOperand &MOP) {
+ return MOP.isRegMask() || (MOP.isReg() && !MOP.isDebug() &&
+ Register::isPhysicalRegister(MOP.getReg()));
+ };
+ return make_filter_range(const_mi_bundle_ops(MI), Pred);
+}
+
} // end namespace llvm
#endif // LLVM_CODEGEN_LIVEREGUNITS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveStacks.h b/linux-x64/clang/include/llvm/CodeGen/LiveStacks.h
index 7c4c64d..1cbdb8b 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveStacks.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveStacks.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cassert>
#include <map>
diff --git a/linux-x64/clang/include/llvm/CodeGen/LiveVariables.h b/linux-x64/clang/include/llvm/CodeGen/LiveVariables.h
index 71de306..9b0667b 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LiveVariables.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LiveVariables.h
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
@@ -104,8 +105,7 @@
/// isLiveIn - Is Reg live in to MBB? This means that Reg is live through
/// MBB, or it is killed in MBB. If Reg is only used by PHI instructions in
/// MBB, it is not considered live in.
- bool isLiveIn(const MachineBasicBlock &MBB,
- unsigned Reg,
+ bool isLiveIn(const MachineBasicBlock &MBB, Register Reg,
MachineRegisterInfo &MRI);
void dump() const;
@@ -148,25 +148,25 @@
/// HandlePhysRegKill - Add kills of Reg and its sub-registers to the
/// uses. Pay special attention to the sub-register uses which may come below
/// the last use of the whole register.
- bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI);
+ bool HandlePhysRegKill(Register Reg, MachineInstr *MI);
/// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask.
void HandleRegMask(const MachineOperand&);
- void HandlePhysRegUse(unsigned Reg, MachineInstr &MI);
- void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ void HandlePhysRegUse(Register Reg, MachineInstr &MI);
+ void HandlePhysRegDef(Register Reg, MachineInstr *MI,
SmallVectorImpl<unsigned> &Defs);
void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs);
/// FindLastRefOrPartRef - Return the last reference or partial reference of
/// the specified register.
- MachineInstr *FindLastRefOrPartRef(unsigned Reg);
+ MachineInstr *FindLastRefOrPartRef(Register Reg);
/// FindLastPartialDef - Return the last partial def of the specified
/// register. Also returns the sub-registers that're defined by the
/// instruction.
- MachineInstr *FindLastPartialDef(unsigned Reg,
- SmallSet<unsigned,4> &PartDefRegs);
+ MachineInstr *FindLastPartialDef(Register Reg,
+ SmallSet<unsigned, 4> &PartDefRegs);
/// analyzePHINodes - Gather information about the PHI nodes in here. In
/// particular, we want to map the variable information of a virtual
@@ -183,21 +183,21 @@
/// RegisterDefIsDead - Return true if the specified instruction defines the
/// specified register, but that definition is dead.
- bool RegisterDefIsDead(MachineInstr &MI, unsigned Reg) const;
+ bool RegisterDefIsDead(MachineInstr &MI, Register Reg) const;
//===--------------------------------------------------------------------===//
// API to update live variable information
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
- void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+ void replaceKillInstruction(Register Reg, MachineInstr &OldMI,
MachineInstr &NewMI);
/// addVirtualRegisterKilled - Add information about the fact that the
/// specified register is killed after being used by the specified
/// instruction. If AddIfNotFound is true, add a implicit operand if it's
/// not found.
- void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr &MI,
+ void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI,
bool AddIfNotFound = false) {
if (MI.addRegisterKilled(IncomingReg, TRI, AddIfNotFound))
getVarInfo(IncomingReg).Kills.push_back(&MI);
@@ -207,14 +207,14 @@
/// register from the live variable information. Returns true if the
/// variable was marked as killed by the specified instruction,
/// false otherwise.
- bool removeVirtualRegisterKilled(unsigned reg, MachineInstr &MI) {
- if (!getVarInfo(reg).removeKill(MI))
+ bool removeVirtualRegisterKilled(Register Reg, MachineInstr &MI) {
+ if (!getVarInfo(Reg).removeKill(MI))
return false;
bool Removed = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isKill() && MO.getReg() == reg) {
+ if (MO.isReg() && MO.isKill() && MO.getReg() == Reg) {
MO.setIsKill(false);
Removed = true;
break;
@@ -233,7 +233,7 @@
/// addVirtualRegisterDead - Add information about the fact that the specified
/// register is dead after being used by the specified instruction. If
/// AddIfNotFound is true, add a implicit operand if it's not found.
- void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr &MI,
+ void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI,
bool AddIfNotFound = false) {
if (MI.addRegisterDead(IncomingReg, TRI, AddIfNotFound))
getVarInfo(IncomingReg).Kills.push_back(&MI);
@@ -243,14 +243,14 @@
/// register from the live variable information. Returns true if the
/// variable was marked dead at the specified instruction, false
/// otherwise.
- bool removeVirtualRegisterDead(unsigned reg, MachineInstr &MI) {
- if (!getVarInfo(reg).removeKill(MI))
+ bool removeVirtualRegisterDead(Register Reg, MachineInstr &MI) {
+ if (!getVarInfo(Reg).removeKill(MI))
return false;
bool Removed = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.getReg() == reg) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
MO.setIsDead(false);
Removed = true;
break;
@@ -269,24 +269,25 @@
/// getVarInfo - Return the VarInfo structure for the specified VIRTUAL
/// register.
- VarInfo &getVarInfo(unsigned RegIdx);
+ VarInfo &getVarInfo(Register Reg);
void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
MachineBasicBlock *BB);
- void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
+ void MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock,
MachineBasicBlock *BB,
- std::vector<MachineBasicBlock*> &WorkList);
- void HandleVirtRegDef(unsigned reg, MachineInstr &MI);
- void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI);
+ SmallVectorImpl<MachineBasicBlock *> &WorkList);
- bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) {
+ void HandleVirtRegDef(Register reg, MachineInstr &MI);
+ void HandleVirtRegUse(Register reg, MachineBasicBlock *MBB, MachineInstr &MI);
+
+ bool isLiveIn(Register Reg, const MachineBasicBlock &MBB) {
return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI);
}
/// isLiveOut - Determine if Reg is live out from MBB, when not considering
/// PHI nodes. This means that Reg is either killed by a successor block or
/// passed through one.
- bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB);
+ bool isLiveOut(Register Reg, const MachineBasicBlock &MBB);
/// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All
/// variables that are live out of DomBB and live into SuccBB will be marked
@@ -296,11 +297,16 @@
MachineBasicBlock *DomBB,
MachineBasicBlock *SuccBB);
+ void addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB,
+ std::vector<SparseBitVector<>> &LiveInSets);
+
/// isPHIJoin - Return true if Reg is a phi join register.
- bool isPHIJoin(unsigned Reg) { return PHIJoins.test(Reg); }
+ bool isPHIJoin(Register Reg) { return PHIJoins.test(Reg.id()); }
/// setPHIJoin - Mark Reg as a phi join register.
- void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); }
+ void setPHIJoin(Register Reg) { PHIJoins.set(Reg.id()); }
};
} // End llvm namespace
diff --git a/linux-x64/clang/include/llvm/CodeGen/LowLevelType.h b/linux-x64/clang/include/llvm/CodeGen/LowLevelType.h
index 687233e..402fa2c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/LowLevelType.h
+++ b/linux-x64/clang/include/llvm/CodeGen/LowLevelType.h
@@ -17,15 +17,28 @@
#define LLVM_CODEGEN_LOWLEVELTYPE_H
#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MachineValueType.h"
namespace llvm {
class DataLayout;
class Type;
+struct fltSemantics;
/// Construct a low-level type based on an LLVM type.
LLT getLLTForType(Type &Ty, const DataLayout &DL);
+/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
+/// pointers, so these will convert to a plain integer.
+MVT getMVTForLLT(LLT Ty);
+
+/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
+/// scalarable vector types, and will assert if used.
+LLT getLLTForMVT(MVT Ty);
+
+/// Get the appropriate floating point arithmetic semantic based on the bit size
+/// of the given scalar LLT.
+const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty);
}
#endif // LLVM_CODEGEN_LOWLEVELTYPE_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MBFIWrapper.h b/linux-x64/clang/include/llvm/CodeGen/MBFIWrapper.h
new file mode 100644
index 0000000..bcbf3ee
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MBFIWrapper.h
@@ -0,0 +1,48 @@
+//===- llvm/CodeGen/MBFIWrapper.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class keeps track of branch frequencies of newly created blocks and
+// tail-merged blocks. Used by the TailDuplication and MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MBFIWRAPPER_H
+#define LLVM_CODEGEN_MBFIWRAPPER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/BlockFrequency.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+
+class MBFIWrapper {
+ public:
+ MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+
+ BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+ Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
+
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const;
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const;
+ void view(const Twine &Name, bool isSimple = true);
+ uint64_t getEntryFreq() const;
+ const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
+
+ private:
+ const MachineBlockFrequencyInfo &MBFI;
+ DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MBFIWRAPPER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MIRFormatter.h b/linux-x64/clang/include/llvm/CodeGen/MIRFormatter.h
new file mode 100644
index 0000000..9cb9209
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MIRFormatter.h
@@ -0,0 +1,87 @@
+//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MIRFormatter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRFORMATTER_H
+#define LLVM_CODEGEN_MIRFORMATTER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+struct PerFunctionMIParsingState;
+struct SlotMapping;
+
+/// MIRFormater - Interface to format MIR operand based on target
+class MIRFormatter {
+public:
+ typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
+ MIRFormatter() {}
+ virtual ~MIRFormatter() = default;
+
+ /// Implement target specific printing for machine operand immediate value, so
+ /// that we can have more meaningful mnemonic than a 64-bit integer. Passing
+ /// None to OpIdx means the index is unknown.
+ virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
+ Optional<unsigned> OpIdx, int64_t Imm) const {
+ OS << Imm;
+ }
+
+ /// Implement target specific parsing of immediate mnemonics. The mnemonic is
+ /// dot seperated strings.
+ virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ StringRef Src, int64_t &Imm,
+ ErrorCallbackType ErrorCallback) const {
+ llvm_unreachable("target did not implement parsing MIR immediate mnemonic");
+ }
+
+ /// Implement target specific printing of target custom pseudo source value.
+ /// Default implementation is not necessarily the correct MIR serialization
+ /// format.
+ virtual void
+ printCustomPseudoSourceValue(raw_ostream &OS, ModuleSlotTracker &MST,
+ const PseudoSourceValue &PSV) const {
+ PSV.printCustom(OS);
+ }
+
+ /// Implement target specific parsing of target custom pseudo source value.
+ virtual bool parseCustomPseudoSourceValue(
+ StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
+ llvm_unreachable(
+ "target did not implement parsing MIR custom pseudo source value");
+ }
+
+ /// Helper functions to print IR value as MIR serialization format which will
+ /// be useful for target specific printer, e.g. for printing IR value in
+ /// custom pseudo source value.
+ static void printIRValue(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST);
+
+ /// Helper functions to parse IR value from MIR serialization format which
+ /// will be useful for target specific parser, e.g. for parsing IR value for
+ /// custom pseudo source value.
+ static bool parseIRValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const Value *&V,
+ ErrorCallbackType ErrorCallback);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIParser.h b/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIParser.h
index 4e32a04..590b3dc 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Allocator.h"
namespace llvm {
@@ -40,8 +41,8 @@
const TargetRegisterClass *RC;
const RegisterBank *RegBank;
} D;
- unsigned VReg;
- unsigned PreferredReg = 0;
+ Register VReg;
+ Register PreferredReg;
};
using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
@@ -55,7 +56,7 @@
StringMap<unsigned> Names2InstrOpCodes;
/// Maps from register names to registers.
- StringMap<unsigned> Names2Regs;
+ StringMap<Register> Names2Regs;
/// Maps from register mask names to register masks.
StringMap<const uint32_t *> Names2RegMasks;
@@ -100,7 +101,7 @@
/// Try to convert a register name to a register number. Return true if the
/// register name is invalid.
- bool getRegisterByName(StringRef RegName, unsigned &Reg);
+ bool getRegisterByName(StringRef RegName, Register &Reg);
/// Check if the given identifier is a name of a register mask.
///
@@ -164,19 +165,23 @@
PerTargetMIParsingState &Target;
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
- DenseMap<unsigned, VRegInfo *> VRegInfos;
+ DenseMap<Register, VRegInfo *> VRegInfos;
StringMap<VRegInfo *> VRegInfosNamed;
DenseMap<unsigned, int> FixedStackObjectSlots;
DenseMap<unsigned, int> StackObjectSlots;
DenseMap<unsigned, unsigned> ConstantPoolSlots;
DenseMap<unsigned, unsigned> JumpTableSlots;
+ /// Maps from slot numbers to function's unnamed values.
+ DenseMap<unsigned, const Value *> Slots2Values;
+
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
const SlotMapping &IRSlots,
PerTargetMIParsingState &Target);
- VRegInfo &getVRegInfo(unsigned Num);
+ VRegInfo &getVRegInfo(Register Num);
VRegInfo &getVRegInfoNamed(StringRef RegName);
+ const Value *getIRValue(unsigned Slot);
};
/// Parse the machine basic block definitions, and skip the machine
@@ -212,10 +217,10 @@
SMDiagnostic &Error);
bool parseRegisterReference(PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
+ Register &Reg, StringRef Src,
SMDiagnostic &Error);
-bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg,
StringRef Src, SMDiagnostic &Error);
bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
diff --git a/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIRParser.h b/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIRParser.h
index 6a04e48..a7c69e2 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -23,10 +23,14 @@
namespace llvm {
-class StringRef;
+class Function;
class MIRParserImpl;
class MachineModuleInfo;
class SMDiagnostic;
+class StringRef;
+
+typedef llvm::function_ref<Optional<std::string>(StringRef)>
+ DataLayoutCallbackTy;
/// This class initializes machine functions by applying the state loaded from
/// a MIR file.
@@ -42,7 +46,8 @@
///
/// A new, empty module is created if the LLVM IR isn't present.
/// \returns nullptr if a parsing error occurred.
- std::unique_ptr<Module> parseIRModule();
+ std::unique_ptr<Module> parseIRModule(
+ DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; });
/// Parses MachineFunctions in the MIR file and add them to the given
/// MachineModuleInfo \p MMI.
@@ -60,9 +65,11 @@
/// \param Filename - The name of the file to parse.
/// \param Error - Error result info.
/// \param Context - Context which will be used for the parsed LLVM IR module.
-std::unique_ptr<MIRParser> createMIRParserFromFile(StringRef Filename,
- SMDiagnostic &Error,
- LLVMContext &Context);
+/// \param ProcessIRFunction - function to run on every IR function or stub
+/// loaded from the MIR file.
+std::unique_ptr<MIRParser> createMIRParserFromFile(
+ StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction = nullptr);
/// This function is another interface to the MIR serialization format parser.
///
@@ -73,7 +80,8 @@
/// \param Contents - The MemoryBuffer containing the machine level IR.
/// \param Context - Context which will be used for the parsed LLVM IR module.
std::unique_ptr<MIRParser>
-createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context);
+createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction = nullptr);
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/MIRYamlMapping.h b/linux-x64/clang/include/llvm/CodeGen/MIRYamlMapping.h
index 94e76a7..4a74064 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MIRYamlMapping.h
@@ -142,6 +142,39 @@
}
};
+template <> struct ScalarTraits<MaybeAlign> {
+ static void output(const MaybeAlign &Alignment, void *,
+ llvm::raw_ostream &out) {
+ out << uint64_t(Alignment ? Alignment->value() : 0U);
+ }
+ static StringRef input(StringRef Scalar, void *, MaybeAlign &Alignment) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 10, n))
+ return "invalid number";
+ if (n > 0 && !isPowerOf2_64(n))
+ return "must be 0 or a power of two";
+ Alignment = MaybeAlign(n);
+ return StringRef();
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+template <> struct ScalarTraits<Align> {
+ static void output(const Align &Alignment, void *, llvm::raw_ostream &OS) {
+ OS << Alignment.value();
+ }
+ static StringRef input(StringRef Scalar, void *, Align &Alignment) {
+ unsigned long long N;
+ if (getAsUnsignedInteger(Scalar, 10, N))
+ return "invalid number";
+ if (!isPowerOf2_64(N))
+ return "must be a power of two";
+ Alignment = Align(N);
+ return StringRef();
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
} // end namespace yaml
} // end namespace llvm
@@ -212,7 +245,7 @@
ObjectType Type = DefaultType;
int64_t Offset = 0;
uint64_t Size = 0;
- unsigned Alignment = 0;
+ MaybeAlign Alignment = None;
TargetStackID::Value StackID;
StringValue CalleeSavedRegister;
bool CalleeSavedRestored = true;
@@ -252,7 +285,7 @@
YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
if (Object.Type != MachineStackObject::VariableSized)
YamlIO.mapRequired("size", Object.Size);
- YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
+ YamlIO.mapOptional("alignment", Object.Alignment, None);
YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
StringValue()); // Don't print it out when it's empty.
@@ -278,7 +311,7 @@
ObjectType Type = DefaultType;
int64_t Offset = 0;
uint64_t Size = 0;
- unsigned Alignment = 0;
+ MaybeAlign Alignment = None;
TargetStackID::Value StackID;
bool IsImmutable = false;
bool IsAliased = false;
@@ -314,6 +347,7 @@
static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
IO.enumCase(ID, "default", TargetStackID::Default);
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+ IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector);
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
}
};
@@ -326,7 +360,7 @@
FixedMachineStackObject::DefaultType); // Don't print the default type.
YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
YamlIO.mapOptional("size", Object.Size, (uint64_t)0);
- YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
+ YamlIO.mapOptional("alignment", Object.Alignment, None);
YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
if (Object.Type != FixedMachineStackObject::SpillSlot) {
YamlIO.mapOptional("isImmutable", Object.IsImmutable, false);
@@ -407,10 +441,40 @@
static const bool flow = true;
};
+/// Serializable representation of debug value substitutions.
+struct DebugValueSubstitution {
+ unsigned SrcInst;
+ unsigned SrcOp;
+ unsigned DstInst;
+ unsigned DstOp;
+
+ bool operator==(const DebugValueSubstitution &Other) const {
+ return std::tie(SrcInst, SrcOp, DstInst, DstOp) ==
+ std::tie(Other.SrcInst, Other.SrcOp, Other.DstInst, Other.DstOp);
+ }
+};
+
+template <> struct MappingTraits<DebugValueSubstitution> {
+ static void mapping(IO &YamlIO, DebugValueSubstitution &Sub) {
+ YamlIO.mapRequired("srcinst", Sub.SrcInst);
+ YamlIO.mapRequired("srcop", Sub.SrcOp);
+ YamlIO.mapRequired("dstinst", Sub.DstInst);
+ YamlIO.mapRequired("dstop", Sub.DstOp);
+ }
+
+ static const bool flow = true;
+};
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::DebugValueSubstitution)
+
+namespace llvm {
+namespace yaml {
struct MachineConstantPoolValue {
UnsignedValue ID;
StringValue Value;
- unsigned Alignment = 0;
+ MaybeAlign Alignment = None;
bool IsTargetSpecific = false;
bool operator==(const MachineConstantPoolValue &Other) const {
@@ -424,7 +488,7 @@
static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) {
YamlIO.mapRequired("id", Constant.ID);
YamlIO.mapOptional("value", Constant.Value, StringValue());
- YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0);
+ YamlIO.mapOptional("alignment", Constant.Alignment, None);
YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false);
}
};
@@ -570,7 +634,7 @@
struct MachineFunction {
StringRef Name;
- unsigned Alignment = 0;
+ MaybeAlign Alignment = None;
bool ExposesReturnsTwice = false;
// GISel MachineFunctionProperties.
bool Legalized = false;
@@ -591,6 +655,7 @@
std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
std::unique_ptr<MachineFunctionInfo> MachineFuncInfo;
std::vector<CallSiteInfo> CallSitesInfo;
+ std::vector<DebugValueSubstitution> DebugValueSubstitutions;
MachineJumpTable JumpTableInfo;
BlockStringValue Body;
};
@@ -598,7 +663,7 @@
template <> struct MappingTraits<MachineFunction> {
static void mapping(IO &YamlIO, MachineFunction &MF) {
YamlIO.mapRequired("name", MF.Name);
- YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0);
+ YamlIO.mapOptional("alignment", MF.Alignment, None);
YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false);
YamlIO.mapOptional("legalized", MF.Legalized, false);
YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false);
@@ -619,6 +684,8 @@
std::vector<MachineStackObject>());
YamlIO.mapOptional("callSites", MF.CallSitesInfo,
std::vector<CallSiteInfo>());
+ YamlIO.mapOptional("debugValueSubstitutions", MF.DebugValueSubstitutions,
+ std::vector<DebugValueSubstitution>());
YamlIO.mapOptional("constants", MF.Constants,
std::vector<MachineConstantPoolValue>());
YamlIO.mapOptional("machineFunctionInfo", MF.MachineFuncInfo);
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineBasicBlock.h b/linux-x64/clang/include/llvm/CodeGen/MachineBasicBlock.h
index 333d0a7..2bad64c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineBasicBlock.h
@@ -15,16 +15,13 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/simple_ilist.h"
+#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/Printable.h"
#include <cassert>
#include <cstdint>
#include <functional>
@@ -39,12 +36,45 @@
class MCSymbol;
class ModuleSlotTracker;
class Pass;
+class Printable;
class SlotIndexes;
class StringRef;
class raw_ostream;
+class LiveIntervals;
class TargetRegisterClass;
class TargetRegisterInfo;
+// This structure uniquely identifies a basic block section.
+// Possible values are
+// {Type: Default, Number: (unsigned)} (These are regular section IDs)
+// {Type: Exception, Number: 0} (ExceptionSectionID)
+// {Type: Cold, Number: 0} (ColdSectionID)
+struct MBBSectionID {
+ enum SectionType {
+ Default = 0, // Regular section (these sections are distinguished by the
+ // Number field).
+ Exception, // Special section type for exception handling blocks
+ Cold, // Special section type for cold blocks
+ } Type;
+ unsigned Number;
+
+ MBBSectionID(unsigned N) : Type(Default), Number(N) {}
+
+ // Special unique sections for cold and exception blocks.
+ const static MBBSectionID ColdSectionID;
+ const static MBBSectionID ExceptionSectionID;
+
+ bool operator==(const MBBSectionID &Other) const {
+ return Type == Other.Type && Number == Other.Number;
+ }
+
+ bool operator!=(const MBBSectionID &Other) const { return !(*this == Other); }
+
+private:
+ // This is only used to construct the special cold and exception sections.
+ MBBSectionID(SectionType T) : Type(T), Number(0) {}
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -103,9 +133,9 @@
using LiveInVector = std::vector<RegisterMaskPair>;
LiveInVector LiveIns;
- /// Alignment of the basic block. Zero if the basic block does not need to be
- /// aligned. The alignment is specified as log2(bytes).
- unsigned Alignment = 0;
+ /// Alignment of the basic block. One if the basic block does not need to be
+ /// aligned.
+ Align Alignment;
/// Indicate that this basic block is entered via an exception handler.
bool IsEHPad = false;
@@ -129,10 +159,26 @@
/// Indicate that this basic block is the entry block of a cleanup funclet.
bool IsCleanupFuncletEntry = false;
+ /// With basic block sections, this stores the Section ID of the basic block.
+ MBBSectionID SectionID{0};
+
+ // Indicate that this basic block begins a section.
+ bool IsBeginSection = false;
+
+ // Indicate that this basic block ends a section.
+ bool IsEndSection = false;
+
+ /// Indicate that this basic block is the indirect dest of an INLINEASM_BR.
+ bool IsInlineAsmBrIndirectTarget = false;
+
/// since getSymbol is a relatively heavy-weight operation, the symbol
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ /// Marks the end of the basic block. Used during basic block sections to
+ /// calculate the size of the basic block, or the BB section ending with it.
+ mutable MCSymbol *CachedEndMCSymbol = nullptr;
+
// Intrusive list support
MachineBasicBlock() = default;
@@ -312,7 +358,7 @@
/// Adds the specified register as a live in. Note that it is an error to add
/// the same register to the same set more than once unless the intention is
/// to call sortUniqueLiveIns after all registers are added.
- void addLiveIn(MCPhysReg PhysReg,
+ void addLiveIn(MCRegister PhysReg,
LaneBitmask LaneMask = LaneBitmask::getAll()) {
LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
}
@@ -331,7 +377,7 @@
/// Add PhysReg as live in to this block, and ensure that there is a copy of
/// PhysReg to a virtual register of class RC. Return the virtual register
/// that is a copy of the live in PhysReg.
- unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC);
+ Register addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC);
/// Remove the specified register from the live in set.
void removeLiveIn(MCPhysReg Reg,
@@ -372,13 +418,11 @@
/// \see getBeginClobberMask()
const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const;
- /// Return alignment of the basic block. The alignment is specified as
- /// log2(bytes).
- unsigned getAlignment() const { return Alignment; }
+ /// Return alignment of the basic block.
+ Align getAlignment() const { return Alignment; }
- /// Set alignment of the basic block. The alignment is specified as
- /// log2(bytes).
- void setAlignment(unsigned Align) { Alignment = Align; }
+ /// Set alignment of the basic block.
+ void setAlignment(Align A) { Alignment = A; }
/// Returns true if the block is a landing pad. That is this basic block is
/// entered via an exception handler.
@@ -390,6 +434,9 @@
bool hasEHPadSuccessor() const;
+ /// Returns true if this is the entry block of the function.
+ bool isEntryBlock() const;
+
/// Returns true if this is the entry block of an EH scope, i.e., the block
/// that used to have a catchpad or cleanuppad instruction in the LLVM IR.
bool isEHScopeEntry() const { return IsEHScopeEntry; }
@@ -410,6 +457,46 @@
/// Indicates if this is the entry block of a cleanup funclet.
void setIsCleanupFuncletEntry(bool V = true) { IsCleanupFuncletEntry = V; }
+ /// Returns true if this block begins any section.
+ bool isBeginSection() const { return IsBeginSection; }
+
+ /// Returns true if this block ends any section.
+ bool isEndSection() const { return IsEndSection; }
+
+ void setIsBeginSection(bool V = true) { IsBeginSection = V; }
+
+ void setIsEndSection(bool V = true) { IsEndSection = V; }
+
+ /// Returns the section ID of this basic block.
+ MBBSectionID getSectionID() const { return SectionID; }
+
+ /// Returns the unique section ID number of this basic block.
+ unsigned getSectionIDNum() const {
+ return ((unsigned)MBBSectionID::SectionType::Cold) -
+ ((unsigned)SectionID.Type) + SectionID.Number;
+ }
+
+ /// Sets the section ID for this basic block.
+ void setSectionID(MBBSectionID V) { SectionID = V; }
+
+ /// Returns the MCSymbol marking the end of this basic block.
+ MCSymbol *getEndSymbol() const;
+
+ /// Returns true if this block may have an INLINEASM_BR (overestimate, by
+ /// checking if any of the successors are indirect targets of any inlineasm_br
+ /// in the function).
+ bool mayHaveInlineAsmBr() const;
+
+ /// Returns true if this is the indirect dest of an INLINEASM_BR.
+ bool isInlineAsmBrIndirectTarget() const {
+ return IsInlineAsmBrIndirectTarget;
+ }
+
+ /// Indicates if this is the indirect dest of an INLINEASM_BR.
+ void setIsInlineAsmBrIndirectTarget(bool V = true) {
+ IsInlineAsmBrIndirectTarget = V;
+ }
+
/// Returns true if it is legal to hoist instructions into this block.
bool isLegalToHoistInto() const;
@@ -421,11 +508,18 @@
void moveBefore(MachineBasicBlock *NewAfter);
void moveAfter(MachineBasicBlock *NewBefore);
- /// Update the terminator instructions in block to account for changes to the
- /// layout. If the block previously used a fallthrough, it may now need a
- /// branch, and if it previously used branching it may now be able to use a
- /// fallthrough.
- void updateTerminator();
+ /// Returns true if this and MBB belong to the same section.
+ bool sameSection(const MachineBasicBlock *MBB) const {
+ return getSectionID() == MBB->getSectionID();
+ }
+
+ /// Update the terminator instructions in block to account for changes to
+ /// block layout which may have been made. PreviousLayoutSuccessor should be
+ /// set to the block which may have been used as fallthrough before the block
+ /// layout was modified. If the block previously fell through to that block,
+ /// it may now need a branch. If it previously branched to another block, it
+ /// may now be able to fallthrough to the current layout successor.
+ void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor);
// Machine-CFG mutators
@@ -585,12 +679,25 @@
return !empty() && back().isEHScopeReturn();
}
+ /// Split a basic block into 2 pieces at \p SplitPoint. A new block will be
+ /// inserted after this block, and all instructions after \p SplitInst moved
+ /// to it (\p SplitInst will be in the original block). If \p LIS is provided,
+ /// LiveIntervals will be appropriately updated. \return the newly inserted
+ /// block.
+ ///
+ /// If \p UpdateLiveIns is true, this will ensure the live ins list is
+ /// accurate, including for physreg uses/defs in the original block.
+ MachineBasicBlock *splitAt(MachineInstr &SplitInst, bool UpdateLiveIns = true,
+ LiveIntervals *LIS = nullptr);
+
/// Split the critical edge from this block to the given successor block, and
/// return the newly created block, or null if splitting is not possible.
///
/// This function updates LiveVariables, MachineDominatorTree, and
/// MachineLoopInfo, as applicable.
- MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P);
+ MachineBasicBlock *
+ SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P,
+ std::vector<SparseBitVector<>> *LiveInSets = nullptr);
/// Check if the edge between this block and the given successor \p
/// Succ, can be split. If this returns true a subsequent call to
@@ -636,6 +743,18 @@
return Insts.insertAfter(I.getInstrIterator(), MI);
}
+ /// If I is bundled then insert MI into the instruction list after the end of
+ /// the bundle, otherwise insert MI immediately after I.
+ instr_iterator insertAfterBundle(instr_iterator I, MachineInstr *MI) {
+ assert((I == instr_end() || I->getParent() == this) &&
+ "iterator points outside of basic block");
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ while (I->isBundledWithSucc())
+ ++I;
+ return Insts.insertAfter(I, MI);
+ }
+
/// Remove an instruction from the instruction list and delete it.
///
/// If the instruction is part of a bundle, the other instructions in the
@@ -723,15 +842,9 @@
/// CFG so that it branches to 'New' instead.
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New);
- /// Various pieces of code can cause excess edges in the CFG to be inserted.
- /// If we have proven that MBB can only branch to DestA and DestB, remove any
- /// other MBB successors from the CFG. DestA and DestB can be null. Besides
- /// DestA and DestB, retain other edges leading to LandingPads (currently
- /// there can be only one; we don't check or require that here). Note it is
- /// possible that DestA and/or DestB are LandingPads.
- bool CorrectExtraCFGEdges(MachineBasicBlock *DestA,
- MachineBasicBlock *DestB,
- bool IsCond);
+ /// Update all phi nodes in this basic block to refer to basic block \p New
+ /// instead of basic block \p Old.
+ void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New);
/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
/// and DBG_LABEL instructions. Return UnknownLoc if there is none.
@@ -767,7 +880,7 @@
///
/// \p Reg must be a physical register.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI,
- unsigned Reg,
+ MCRegister Reg,
const_iterator Before,
unsigned Neighborhood = 10) const;
@@ -778,6 +891,14 @@
void print(raw_ostream &OS, ModuleSlotTracker &MST,
const SlotIndexes * = nullptr, bool IsStandalone = true) const;
+ enum PrintNameFlag {
+ PrintNameIr = (1 << 0), ///< Add IR name where available
+ PrintNameAttributes = (1 << 1), ///< Print attributes
+ };
+
+ void printName(raw_ostream &os, unsigned printNameFlags = PrintNameIr,
+ ModuleSlotTracker *moduleSlotTracker = nullptr) const;
+
// Printing method used by LoopInfo.
void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
@@ -932,7 +1053,7 @@
template<typename IterT>
inline IterT skipDebugInstructionsForward(IterT It, IterT End) {
while (It != End && It->isDebugInstr())
- It++;
+ ++It;
return It;
}
@@ -943,10 +1064,31 @@
template<class IterT>
inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) {
while (It != Begin && It->isDebugInstr())
- It--;
+ --It;
return It;
}
+/// Increment \p It, then continue incrementing it while it points to a debug
+/// instruction. A replacement for std::next.
+template <typename IterT> inline IterT next_nodbg(IterT It, IterT End) {
+ return skipDebugInstructionsForward(std::next(It), End);
+}
+
+/// Decrement \p It, then continue decrementing it while it points to a debug
+/// instruction. A replacement for std::prev.
+template <typename IterT> inline IterT prev_nodbg(IterT It, IterT Begin) {
+ return skipDebugInstructionsBackward(std::prev(It), Begin);
+}
+
+/// Construct a range iterator which begins at \p It and moves forwards until
+/// \p End is reached, skipping any debug instructions.
+template <typename IterT>
+inline auto instructionsWithoutDebug(IterT It, IterT End) {
+ return make_filter_range(make_range(It, End), [](const MachineInstr &MI) {
+ return !MI.isDebugInstr();
+ });
+}
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEBASICBLOCK_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index a438ecf..6c442d3 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -38,6 +38,9 @@
static char ID;
MachineBlockFrequencyInfo();
+ explicit MachineBlockFrequencyInfo(MachineFunction &F,
+ MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI);
~MachineBlockFrequencyInfo() override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -55,16 +58,33 @@
/// information. Please note that initial frequency is equal to 1024. It means
/// that we should not rely on the value itself, but only on the comparison to
/// the other block frequencies. We do this to avoid using of floating points.
- ///
+ /// For example, to get the frequency of a block relative to the entry block,
+ /// divide the integral value returned by this function (the
+ /// BlockFrequency::getFrequency() value) by getEntryFreq().
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ /// Compute the frequency of the block, relative to the entry block.
+ /// This API assumes getEntryFreq() is non-zero.
+ float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const {
+ return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq());
+ }
+
Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;
- bool isIrrLoopHeader(const MachineBasicBlock *MBB);
+ bool isIrrLoopHeader(const MachineBasicBlock *MBB) const;
+
+ /// incrementally calculate block frequencies when we split edges, to avoid
+ /// full CFG traversal.
+ void onEdgeSplit(const MachineBasicBlock &NewPredecessor,
+ const MachineBasicBlock &NewSuccessor,
+ const MachineBranchProbabilityInfo &MBPI);
const MachineFunction *getFunction() const;
const MachineBranchProbabilityInfo *getMBPI() const;
+
+ /// Pop up a ghostview window with the current block frequency propagation
+ /// rendered using dot.
void view(const Twine &Name, bool isSimple = true) const;
// Print the block frequency Freq to OS using the current functions entry
@@ -76,6 +96,8 @@
raw_ostream &printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const;
+ /// Divide a block's BlockFrequency::getFrequency() value by this value to
+ /// obtain the entry block - relative frequency of said block.
uint64_t getEntryFreq() const;
};
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 2b9b203..cde3bc0 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -35,10 +35,7 @@
public:
static char ID;
- MachineBranchProbabilityInfo() : ImmutablePass(ID) {
- PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeMachineBranchProbabilityInfoPass(Registry);
- }
+ MachineBranchProbabilityInfo();
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineCombinerPattern.h b/linux-x64/clang/include/llvm/CodeGen/MachineCombinerPattern.h
index 4f4034b..e9f52fb 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -25,6 +25,10 @@
REASSOC_XA_BY,
REASSOC_XA_YB,
+ // These are patterns matched by the PowerPC to reassociate FMA chains.
+ REASSOC_XY_AMM_BMM,
+ REASSOC_XMM_AMM_BMM,
+
// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,
MULADDW_OP2,
@@ -38,7 +42,56 @@
MULSUBX_OP2,
MULADDXI_OP1,
MULSUBXI_OP1,
+ // NEON integers vectors
+ MULADDv8i8_OP1,
+ MULADDv8i8_OP2,
+ MULADDv16i8_OP1,
+ MULADDv16i8_OP2,
+ MULADDv4i16_OP1,
+ MULADDv4i16_OP2,
+ MULADDv8i16_OP1,
+ MULADDv8i16_OP2,
+ MULADDv2i32_OP1,
+ MULADDv2i32_OP2,
+ MULADDv4i32_OP1,
+ MULADDv4i32_OP2,
+
+ MULSUBv8i8_OP1,
+ MULSUBv8i8_OP2,
+ MULSUBv16i8_OP1,
+ MULSUBv16i8_OP2,
+ MULSUBv4i16_OP1,
+ MULSUBv4i16_OP2,
+ MULSUBv8i16_OP1,
+ MULSUBv8i16_OP2,
+ MULSUBv2i32_OP1,
+ MULSUBv2i32_OP2,
+ MULSUBv4i32_OP1,
+ MULSUBv4i32_OP2,
+
+ MULADDv4i16_indexed_OP1,
+ MULADDv4i16_indexed_OP2,
+ MULADDv8i16_indexed_OP1,
+ MULADDv8i16_indexed_OP2,
+ MULADDv2i32_indexed_OP1,
+ MULADDv2i32_indexed_OP2,
+ MULADDv4i32_indexed_OP1,
+ MULADDv4i32_indexed_OP2,
+
+ MULSUBv4i16_indexed_OP1,
+ MULSUBv4i16_indexed_OP2,
+ MULSUBv8i16_indexed_OP1,
+ MULSUBv8i16_indexed_OP2,
+ MULSUBv2i32_indexed_OP1,
+ MULSUBv2i32_indexed_OP2,
+ MULSUBv4i32_indexed_OP1,
+ MULSUBv4i32_indexed_OP2,
+
// Floating Point
+ FMULADDH_OP1,
+ FMULADDH_OP2,
+ FMULSUBH_OP1,
+ FMULSUBH_OP2,
FMULADDS_OP1,
FMULADDS_OP2,
FMULSUBS_OP1,
@@ -47,16 +100,25 @@
FMULADDD_OP2,
FMULSUBD_OP1,
FMULSUBD_OP2,
+ FNMULSUBH_OP1,
FNMULSUBS_OP1,
FNMULSUBD_OP1,
FMLAv1i32_indexed_OP1,
FMLAv1i32_indexed_OP2,
FMLAv1i64_indexed_OP1,
FMLAv1i64_indexed_OP2,
+ FMLAv4f16_OP1,
+ FMLAv4f16_OP2,
+ FMLAv8f16_OP1,
+ FMLAv8f16_OP2,
FMLAv2f32_OP2,
FMLAv2f32_OP1,
FMLAv2f64_OP1,
FMLAv2f64_OP2,
+ FMLAv4i16_indexed_OP1,
+ FMLAv4i16_indexed_OP2,
+ FMLAv8i16_indexed_OP1,
+ FMLAv8i16_indexed_OP2,
FMLAv2i32_indexed_OP1,
FMLAv2i32_indexed_OP2,
FMLAv2i64_indexed_OP1,
@@ -67,10 +129,18 @@
FMLAv4i32_indexed_OP2,
FMLSv1i32_indexed_OP2,
FMLSv1i64_indexed_OP2,
+ FMLSv4f16_OP1,
+ FMLSv4f16_OP2,
+ FMLSv8f16_OP1,
+ FMLSv8f16_OP2,
FMLSv2f32_OP1,
FMLSv2f32_OP2,
FMLSv2f64_OP1,
FMLSv2f64_OP2,
+ FMLSv4i16_indexed_OP1,
+ FMLSv4i16_indexed_OP2,
+ FMLSv8i16_indexed_OP1,
+ FMLSv8i16_indexed_OP2,
FMLSv2i32_indexed_OP1,
FMLSv2i32_indexed_OP2,
FMLSv2i64_indexed_OP1,
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineConstantPool.h b/linux-x64/clang/include/llvm/CodeGen/MachineConstantPool.h
index 4d07b62..a9bc0ce 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineConstantPool.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineConstantPool.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Alignment.h"
#include <climits>
#include <vector>
@@ -40,12 +41,12 @@
explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {}
virtual ~MachineConstantPoolValue() = default;
- /// getType - get type of this MachineConstantPoolValue.
- ///
Type *getType() const { return Ty; }
+ virtual unsigned getSizeInBytes(const DataLayout &DL) const;
+
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) = 0;
+ Align Alignment) = 0;
virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) = 0;
@@ -71,33 +72,29 @@
MachineConstantPoolValue *MachineCPVal;
} Val;
- /// The required alignment for this entry. The top bit is set when Val is
- /// a target specific MachineConstantPoolValue.
- unsigned Alignment;
+ /// The required alignment for this entry.
+ Align Alignment;
- MachineConstantPoolEntry(const Constant *V, unsigned A)
- : Alignment(A) {
+ bool IsMachineConstantPoolEntry;
+
+ MachineConstantPoolEntry(const Constant *V, Align A)
+ : Alignment(A), IsMachineConstantPoolEntry(false) {
Val.ConstVal = V;
}
- MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
- : Alignment(A) {
+ MachineConstantPoolEntry(MachineConstantPoolValue *V, Align A)
+ : Alignment(A), IsMachineConstantPoolEntry(true) {
Val.MachineCPVal = V;
- Alignment |= 1U << (sizeof(unsigned) * CHAR_BIT - 1);
}
/// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry
/// is indeed a target specific constantpool entry, not a wrapper over a
/// Constant.
- bool isMachineConstantPoolEntry() const {
- return (int)Alignment < 0;
- }
+ bool isMachineConstantPoolEntry() const { return IsMachineConstantPoolEntry; }
- int getAlignment() const {
- return Alignment & ~(1 << (sizeof(unsigned) * CHAR_BIT - 1));
- }
+ Align getAlign() const { return Alignment; }
- Type *getType() const;
+ unsigned getSizeInBytes(const DataLayout &DL) const;
/// This method classifies the entry according to whether or not it may
/// generate a relocation entry. This must be conservative, so if it might
@@ -118,7 +115,7 @@
/// address of the function constant pool values.
/// The machine constant pool.
class MachineConstantPool {
- unsigned PoolAlignment; ///< The alignment for the pool.
+ Align PoolAlignment; ///< The alignment for the pool.
std::vector<MachineConstantPoolEntry> Constants; ///< The pool of constants.
/// MachineConstantPoolValues that use an existing MachineConstantPoolEntry.
DenseSet<MachineConstantPoolValue*> MachineCPVsSharingEntries;
@@ -132,16 +129,15 @@
: PoolAlignment(1), DL(DL) {}
~MachineConstantPool();
- /// getConstantPoolAlignment - Return the alignment required by
- /// the whole constant pool, of which the first element must be aligned.
- unsigned getConstantPoolAlignment() const { return PoolAlignment; }
+ /// Return the alignment required by the whole constant pool, of which the
+ /// first element must be aligned.
+ Align getConstantPoolAlign() const { return PoolAlignment; }
/// getConstantPoolIndex - Create a new entry in the constant pool or return
/// an existing one. User must specify the minimum required alignment for
/// the object.
- unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment);
- unsigned getConstantPoolIndex(MachineConstantPoolValue *V,
- unsigned Alignment);
+ unsigned getConstantPoolIndex(const Constant *C, Align Alignment);
+ unsigned getConstantPoolIndex(MachineConstantPoolValue *V, Align Alignment);
/// isEmpty - Return true if this constant pool contains no constants.
bool isEmpty() const { return Constants.empty(); }
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineDominanceFrontier.h b/linux-x64/clang/include/llvm/CodeGen/MachineDominanceFrontier.h
index f7bbd07..e3e6796 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/GenericDomTree.h"
-#include <vector>
namespace llvm {
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineDominators.h b/linux-x64/clang/include/llvm/CodeGen/MachineDominators.h
index d220008..46bf73c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineDominators.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineDominators.h
@@ -23,7 +23,6 @@
#include "llvm/Support/GenericDomTreeConstruction.h"
#include <cassert>
#include <memory>
-#include <vector>
namespace llvm {
@@ -44,6 +43,8 @@
/// compute a normal dominator tree.
///
class MachineDominatorTree : public MachineFunctionPass {
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+
/// Helper structure used to hold all the basic blocks
/// involved in the split of a critical edge.
struct CriticalEdge {
@@ -65,8 +66,8 @@
/// such as BB == elt.NewBB.
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
- /// The DominatorTreeBase that is used to compute a normal dominator tree
- std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT;
+ /// The DominatorTreeBase that is used to compute a normal dominator tree.
+ std::unique_ptr<DomTreeT> DT;
/// Apply all the recorded critical edges to the DT.
/// This updates the underlying DT information in a way that uses
@@ -79,44 +80,39 @@
static char ID; // Pass ID, replacement for typeid
MachineDominatorTree();
+ explicit MachineDominatorTree(MachineFunction &MF) : MachineFunctionPass(ID) {
+ calculate(MF);
+ }
- DomTreeBase<MachineBasicBlock> &getBase() {
- if (!DT) DT.reset(new DomTreeBase<MachineBasicBlock>());
+ DomTreeT &getBase() {
+ if (!DT) DT.reset(new DomTreeT());
applySplitCriticalEdges();
return *DT;
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
- /// getRoots - Return the root blocks of the current CFG. This may include
- /// multiple blocks if we are computing post dominators. For forward
- /// dominators, this will always be a single block (the entry node).
- ///
- inline const SmallVectorImpl<MachineBasicBlock*> &getRoots() const {
- applySplitCriticalEdges();
- return DT->getRoots();
- }
-
- inline MachineBasicBlock *getRoot() const {
+ MachineBasicBlock *getRoot() const {
applySplitCriticalEdges();
return DT->getRoot();
}
- inline MachineDomTreeNode *getRootNode() const {
+ MachineDomTreeNode *getRootNode() const {
applySplitCriticalEdges();
return DT->getRootNode();
}
bool runOnMachineFunction(MachineFunction &F) override;
- inline bool dominates(const MachineDomTreeNode* A,
- const MachineDomTreeNode* B) const {
+ void calculate(MachineFunction &F);
+
+ bool dominates(const MachineDomTreeNode *A,
+ const MachineDomTreeNode *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
}
- inline bool dominates(const MachineBasicBlock* A,
- const MachineBasicBlock* B) const {
+ bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
}
@@ -133,36 +129,30 @@
for (; &*I != A && &*I != B; ++I)
/*empty*/ ;
- //if(!DT.IsPostDominators) {
- // A dominates B if it is found first in the basic block.
- return &*I == A;
- //} else {
- // // A post-dominates B if B is found first in the basic block.
- // return &*I == B;
- //}
+ return &*I == A;
}
- inline bool properlyDominates(const MachineDomTreeNode* A,
- const MachineDomTreeNode* B) const {
+ bool properlyDominates(const MachineDomTreeNode *A,
+ const MachineDomTreeNode *B) const {
applySplitCriticalEdges();
return DT->properlyDominates(A, B);
}
- inline bool properlyDominates(const MachineBasicBlock* A,
- const MachineBasicBlock* B) const {
+ bool properlyDominates(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->properlyDominates(A, B);
}
/// findNearestCommonDominator - Find nearest common dominator basic block
/// for basic block A and B. If there is no such block then return NULL.
- inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
- MachineBasicBlock *B) {
+ MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
+ MachineBasicBlock *B) {
applySplitCriticalEdges();
return DT->findNearestCommonDominator(A, B);
}
- inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
applySplitCriticalEdges();
return DT->getNode(BB);
}
@@ -170,7 +160,7 @@
/// getNode - return the (Post)DominatorTree node for the specified basic
/// block. This is the same as using operator[] on this class.
///
- inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
applySplitCriticalEdges();
return DT->getNode(BB);
}
@@ -178,8 +168,8 @@
/// addNewBlock - Add a new node to the dominator tree information. This
/// creates a new node as a child of DomBB dominator node,linking it into
/// the children list of the immediate dominator.
- inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
- MachineBasicBlock *DomBB) {
+ MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB) {
applySplitCriticalEdges();
return DT->addNewBlock(BB, DomBB);
}
@@ -187,14 +177,14 @@
/// changeImmediateDominator - This method is used to update the dominator
/// tree information when a node's immediate dominator changes.
///
- inline void changeImmediateDominator(MachineBasicBlock *N,
- MachineBasicBlock* NewIDom) {
+ void changeImmediateDominator(MachineBasicBlock *N,
+ MachineBasicBlock *NewIDom) {
applySplitCriticalEdges();
DT->changeImmediateDominator(N, NewIDom);
}
- inline void changeImmediateDominator(MachineDomTreeNode *N,
- MachineDomTreeNode* NewIDom) {
+ void changeImmediateDominator(MachineDomTreeNode *N,
+ MachineDomTreeNode *NewIDom) {
applySplitCriticalEdges();
DT->changeImmediateDominator(N, NewIDom);
}
@@ -202,14 +192,14 @@
/// eraseNode - Removes a node from the dominator tree. Block must not
/// dominate any other blocks. Removes node from its immediate dominator's
/// children list. Deletes dominator node associated with basic block BB.
- inline void eraseNode(MachineBasicBlock *BB) {
+ void eraseNode(MachineBasicBlock *BB) {
applySplitCriticalEdges();
DT->eraseNode(BB);
}
/// splitBlock - BB is split and now it has one successor. Update dominator
/// tree to reflect this change.
- inline void splitBlock(MachineBasicBlock* NewBB) {
+ void splitBlock(MachineBasicBlock* NewBB) {
applySplitCriticalEdges();
DT->splitBlock(NewBB);
}
@@ -270,7 +260,8 @@
template <>
struct GraphTraits<MachineDomTreeNode *>
: public MachineDomTreeGraphTraitsBase<MachineDomTreeNode,
- MachineDomTreeNode::iterator> {};
+ MachineDomTreeNode::const_iterator> {
+};
template <>
struct GraphTraits<const MachineDomTreeNode *>
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineFrameInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineFrameInfo.h
index 7617351..7f0ec0d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineFrameInfo.h
@@ -14,6 +14,8 @@
#define LLVM_CODEGEN_MACHINEFRAMEINFO_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
#include <vector>
@@ -30,7 +32,7 @@
/// Callee saved reg can also be saved to a different register rather than
/// on the stack by setting DstReg instead of FrameIdx.
class CalleeSavedInfo {
- unsigned Reg;
+ Register Reg;
union {
int FrameIdx;
unsigned DstReg;
@@ -57,14 +59,14 @@
: Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
// Accessors.
- unsigned getReg() const { return Reg; }
+ Register getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
unsigned getDstReg() const { return DstReg; }
void setFrameIdx(int FI) {
FrameIdx = FI;
SpilledToReg = false;
}
- void setDstReg(unsigned SpillReg) {
+ void setDstReg(Register SpillReg) {
DstReg = SpillReg;
SpilledToReg = true;
}
@@ -129,7 +131,7 @@
uint64_t Size;
// The required alignment of this stack slot.
- unsigned Alignment;
+ Align Alignment;
// If true, the value of the stack object is set before
// entering the function and is not modified inside the function. By
@@ -180,17 +182,16 @@
uint8_t SSPLayout;
- StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset,
+ StackObject(uint64_t Size, Align Alignment, int64_t SPOffset,
bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
bool IsAliased, uint8_t StackID = 0)
- : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
- isImmutable(IsImmutable), isSpillSlot(IsSpillSlot),
- StackID(StackID), Alloca(Alloca), isAliased(IsAliased),
- SSPLayout(SSPLK_None) {}
+ : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
+ isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID),
+ Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {}
};
/// The alignment of the stack.
- unsigned StackAlignment;
+ Align StackAlignment;
/// Can the stack be realigned. This can be false if the target does not
/// support stack realignment, or if the user asks us not to realign the
@@ -260,7 +261,7 @@
/// native alignment maintained by the compiler, dynamic alignment code will
/// be needed.
///
- unsigned MaxAlignment = 0;
+ Align MaxAlignment;
/// Set to true if this function adjusts the stack -- e.g.,
/// when calling another function. This is only valid during and after
@@ -304,7 +305,7 @@
/// Required alignment of the local object blob, which is the strictest
/// alignment of any object in it.
- unsigned LocalFrameMaxAlign = 0;
+ Align LocalFrameMaxAlign;
/// Whether the local object blob needs to be allocated together. If not,
/// PEI should ignore the isPreAllocated flags on the stack objects and
@@ -338,8 +339,8 @@
public:
explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable,
bool ForcedRealign)
- : StackAlignment(StackAlignment), StackRealignable(StackRealignable),
- ForcedRealign(ForcedRealign) {}
+ : StackAlignment(assumeAligned(StackAlignment)),
+ StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
/// Return true if there are any stack objects in this function.
bool hasStackObjects() const { return !Objects.empty(); }
@@ -419,10 +420,12 @@
/// Required alignment of the local object blob,
/// which is the strictest alignment of any object in it.
- void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; }
+ void setLocalFrameMaxAlign(Align Alignment) {
+ LocalFrameMaxAlign = Alignment;
+ }
/// Return the required alignment of the local object blob.
- unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
+ Align getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
/// Get whether the local allocation blob should be allocated together or
/// let PEI allocate the locals in it directly.
@@ -458,22 +461,34 @@
Objects[ObjectIdx+NumFixedObjects].Size = Size;
}
+ LLVM_ATTRIBUTE_DEPRECATED(inline unsigned getObjectAlignment(int ObjectIdx)
+ const,
+ "Use getObjectAlign instead") {
+ return getObjectAlign(ObjectIdx).value();
+ }
+
/// Return the alignment of the specified stack object.
- unsigned getObjectAlignment(int ObjectIdx) const {
- assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ Align getObjectAlign(int ObjectIdx) const {
+ assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
- return Objects[ObjectIdx+NumFixedObjects].Alignment;
+ return Objects[ObjectIdx + NumFixedObjects].Alignment;
}
/// setObjectAlignment - Change the alignment of the specified stack object.
- void setObjectAlignment(int ObjectIdx, unsigned Align) {
- assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ void setObjectAlignment(int ObjectIdx, Align Alignment) {
+ assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
- Objects[ObjectIdx+NumFixedObjects].Alignment = Align;
+ Objects[ObjectIdx + NumFixedObjects].Alignment = Alignment;
// Only ensure max alignment for the default stack.
if (getStackID(ObjectIdx) == 0)
- ensureMaxAlignment(Align);
+ ensureMaxAlignment(Alignment);
+ }
+
+ LLVM_ATTRIBUTE_DEPRECATED(inline void setObjectAlignment(int ObjectIdx,
+ unsigned Align),
+ "Use the version that takes Align instead") {
+ setObjectAlignment(ObjectIdx, assumeAligned(Align));
}
/// Return the underlying Alloca of the specified
@@ -551,7 +566,7 @@
void setStackSize(uint64_t Size) { StackSize = Size; }
/// Estimate and return the size of the stack frame.
- unsigned estimateStackSize(const MachineFunction &MF) const;
+ uint64_t estimateStackSize(const MachineFunction &MF) const;
/// Return the correction for frame offsets.
int getOffsetAdjustment() const { return OffsetAdjustment; }
@@ -561,10 +576,21 @@
/// Return the alignment in bytes that this function must be aligned to,
/// which is greater than the default stack alignment provided by the target.
- unsigned getMaxAlignment() const { return MaxAlignment; }
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned getMaxAlignment() const,
+ "Use getMaxAlign instead") {
+ return MaxAlignment.value();
+ }
+ /// Return the alignment in bytes that this function must be aligned to,
+ /// which is greater than the default stack alignment provided by the target.
+ Align getMaxAlign() const { return MaxAlignment; }
/// Make sure the function is at least Align bytes aligned.
- void ensureMaxAlignment(unsigned Align);
+ void ensureMaxAlignment(Align Alignment);
+
+ LLVM_ATTRIBUTE_DEPRECATED(inline void ensureMaxAlignment(unsigned Align),
+ "Use the version that uses Align instead") {
+ ensureMaxAlignment(assumeAligned(Align));
+ }
/// Return true if this function adjusts the stack -- e.g.,
/// when calling another function. This is only valid during and after
@@ -728,12 +754,26 @@
/// Create a new statically sized stack object, returning
/// a nonnegative identifier to represent it.
- int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot,
+ int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot,
const AllocaInst *Alloca = nullptr, uint8_t ID = 0);
+ LLVM_ATTRIBUTE_DEPRECATED(
+ inline int CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSpillSlot,
+ const AllocaInst *Alloca = nullptr,
+ uint8_t ID = 0),
+ "Use CreateStackObject that takes an Align instead") {
+ return CreateStackObject(Size, assumeAligned(Alignment), isSpillSlot,
+ Alloca, ID);
+ }
/// Create a new statically sized stack object that represents a spill slot,
/// returning a nonnegative identifier to represent it.
- int CreateSpillStackObject(uint64_t Size, unsigned Alignment);
+ int CreateSpillStackObject(uint64_t Size, Align Alignment);
+ LLVM_ATTRIBUTE_DEPRECATED(
+ inline int CreateSpillStackObject(uint64_t Size, unsigned Alignment),
+ "Use CreateSpillStackObject that takes an Align instead") {
+ return CreateSpillStackObject(Size, assumeAligned(Alignment));
+ }
/// Remove or mark dead a statically sized stack object.
void RemoveStackObject(int ObjectIdx) {
@@ -744,7 +784,13 @@
/// Notify the MachineFrameInfo object that a variable sized object has been
/// created. This must be created whenever a variable sized object is
/// created, whether or not the index returned is actually used.
- int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca);
+ int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca);
+ /// FIXME: Remove this function when transition to Align is over.
+ LLVM_ATTRIBUTE_DEPRECATED(int CreateVariableSizedObject(
+ unsigned Alignment, const AllocaInst *Alloca),
+ "Use the version that takes an Align instead") {
+ return CreateVariableSizedObject(assumeAligned(Alignment), Alloca);
+ }
/// Returns a reference to call saved info vector for the current function.
const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
@@ -755,8 +801,8 @@
/// Used by prolog/epilog inserter to set the function's callee saved
/// information.
- void setCalleeSavedInfo(const std::vector<CalleeSavedInfo> &CSI) {
- CSInfo = CSI;
+ void setCalleeSavedInfo(std::vector<CalleeSavedInfo> CSI) {
+ CSInfo = std::move(CSI);
}
/// Has the callee saved info been calculated yet?
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineFunction.h b/linux-x64/clang/include/llvm/CodeGen/MachineFunction.h
index 201c126..e9979c7 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineFunction.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineFunction.h
@@ -21,9 +21,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -34,8 +32,8 @@
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Recycler.h"
+#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -48,10 +46,12 @@
class BlockAddress;
class DataLayout;
class DebugLoc;
+struct DenormalMode;
class DIExpression;
class DILocalVariable;
class DILocation;
class Function;
+class GISelChangeObserver;
class GlobalValue;
class LLVMTargetMachine;
class MachineConstantPool;
@@ -63,10 +63,12 @@
class MCContext;
class MCInstrDesc;
class MCSymbol;
+class MCSection;
class Pass;
class PseudoSourceValueManager;
class raw_ostream;
class SlotIndexes;
+class StringRef;
class TargetRegisterClass;
class TargetSubtargetInfo;
struct WasmEHFuncInfo;
@@ -142,6 +144,8 @@
// operands, this also means that all generic virtual registers have been
// constrained to virtual registers (assigned to register classes) and that
// all sizes attached to them have been eliminated.
+ // TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
+ // means that tied-def have been rewritten to meet the RegConstraint.
enum class Property : unsigned {
IsSSA,
NoPHIs,
@@ -151,7 +155,8 @@
Legalized,
RegBankSelected,
Selected,
- LastProperty = Selected,
+ TiedOpsRewritten,
+ LastProperty = TiedOpsRewritten,
};
bool hasProperty(Property P) const {
@@ -220,7 +225,7 @@
};
class MachineFunction {
- const Function &F;
+ Function &F;
const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
MCContext &Ctx;
@@ -242,6 +247,9 @@
// Keep track of jump tables for switch instructions
MachineJumpTableInfo *JumpTableInfo;
+ // Keep track of the function section.
+ MCSection *Section = nullptr;
+
// Keeps track of Wasm exception handling related data. This will be null for
// functions that aren't using a wasm EH personality.
WasmEHFuncInfo *WasmEHInfo = nullptr;
@@ -255,6 +263,12 @@
// numbered and this vector keeps track of the mapping from ID's to MBB's.
std::vector<MachineBasicBlock*> MBBNumbering;
+ // Unary encoding of basic block symbols is used to reduce size of ".strtab".
+ // Basic block number 'i' gets a prefix of length 'i'. The ith character also
+ // denotes the type of basic block number 'i'. Return blocks are marked with
+ // 'r', landing pads with 'l' and regular blocks with 'a'.
+ std::vector<char> BBSectionsSymbolPrefix;
+
// Pool-allocate MachineFunction-lifetime and IR objects.
BumpPtrAllocator Allocator;
@@ -277,7 +291,7 @@
unsigned FunctionNumber;
/// Alignment - The alignment of the function.
- unsigned Alignment;
+ Align Alignment;
/// ExposesReturnsTwice - True if the function calls setjmp or related
/// functions with attribute "returns twice", but doesn't have
@@ -303,6 +317,10 @@
/// by debug and exception handling consumers.
std::vector<MCCFIInstruction> FrameInstructions;
+ /// List of basic blocks immediately following calls to _setjmp. Used to
+ /// construct a table of valid longjmp targets for Windows Control Flow Guard.
+ std::vector<MCSymbol *> LongjmpTargets;
+
/// \name Exception Handling
/// \{
@@ -321,15 +339,14 @@
/// CodeView label annotations.
std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
- /// CodeView heapallocsites.
- std::vector<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
- CodeViewHeapAllocSites;
-
bool CallsEHReturn = false;
bool CallsUnwindInit = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
+ /// Section Type for basic blocks, only relevant with basic block sections.
+ BasicBlockSection BBSectionsType = BasicBlockSection::None;
+
/// List of C++ TypeInfo used.
std::vector<const GlobalValue *> TypeInfos;
@@ -383,9 +400,9 @@
/// For now we support only cases when argument is transferred through one
/// register.
struct ArgRegPair {
- unsigned Reg;
+ Register Reg;
uint16_t ArgNo;
- ArgRegPair(unsigned R, unsigned Arg) : Reg(R), ArgNo(Arg) {
+ ArgRegPair(Register R, unsigned Arg) : Reg(R), ArgNo(Arg) {
assert(Arg < (1 << 16) && "Arg out of range");
}
};
@@ -395,11 +412,16 @@
private:
Delegate *TheDelegate = nullptr;
+ GISelChangeObserver *Observer = nullptr;
using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
/// Map a call instruction to call site arguments forwarding info.
CallSiteInfoMap CallSitesInfo;
+ /// A helper function that returns call site info for a give call
+ /// instruction if debug entry value support is enabled.
+ CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI);
+
// Callbacks for insertion and removal.
void handleInsertion(MachineInstr &MI);
void handleRemoval(MachineInstr &MI);
@@ -409,7 +431,40 @@
using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
VariableDbgInfoMapTy VariableDbgInfos;
- MachineFunction(const Function &F, const LLVMTargetMachine &Target,
+ /// A count of how many instructions in the function have had numbers
+ /// assigned to them. Used for debug value tracking, to determine the
+ /// next instruction number.
+ unsigned DebugInstrNumberingCount = 0;
+
+ /// Set value of DebugInstrNumberingCount field. Avoid using this unless
+ /// you're deserializing this data.
+ void setDebugInstrNumberingCount(unsigned Num);
+
+ /// Pair of instruction number and operand number.
+ using DebugInstrOperandPair = std::pair<unsigned, unsigned>;
+
+ /// Substitution map: from one <inst,operand> pair to another. Used to
+ /// record changes in where a value is defined, so that debug variable
+ /// locations can find it later.
+ std::map<DebugInstrOperandPair, DebugInstrOperandPair>
+ DebugValueSubstitutions;
+
+ /// Create a substitution between one <instr,operand> value to a different,
+ /// new value.
+ void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair);
+
+ /// Create substitutions for any tracked values in \p Old, to point at
+ /// \p New. Needed when we re-create an instruction during optimization,
+ /// which has the same signature (i.e., def operands in the same place) but
+ /// a modified instruction type, flags, or otherwise. An example: X86 moves
+ /// are sometimes transformed into equivalent LEAs.
+ /// If the two instructions are not the same opcode, limit which operands to
+ /// examine for substitutions to the first N operands by setting
+ /// \p MaxOperand.
+ void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New,
+ unsigned MaxOperand = UINT_MAX);
+
+ MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
MachineFunction(const MachineFunction &) = delete;
@@ -439,15 +494,28 @@
TheDelegate = delegate;
}
+ void setObserver(GISelChangeObserver *O) { Observer = O; }
+
+ GISelChangeObserver *getObserver() const { return Observer; }
+
MachineModuleInfo &getMMI() const { return MMI; }
MCContext &getContext() const { return Ctx; }
+ /// Returns the Section this function belongs to.
+ MCSection *getSection() const { return Section; }
+
+ /// Indicates the Section this function belongs to.
+ void setSection(MCSection *S) { Section = S; }
+
PseudoSourceValueManager &getPSVManager() const { return *PSVManager; }
/// Return the DataLayout attached to the Module associated to this MF.
const DataLayout &getDataLayout() const;
/// Return the LLVM function that this machine code represents
+ Function &getFunction() { return F; }
+
+ /// Return the LLVM function that this machine code represents
const Function &getFunction() const { return F; }
/// getName - Return the name of the corresponding LLVM function.
@@ -456,6 +524,24 @@
/// getFunctionNumber - Return a unique ID for the current function.
unsigned getFunctionNumber() const { return FunctionNumber; }
+ /// Returns true if this function has basic block sections enabled.
+ bool hasBBSections() const {
+ return (BBSectionsType == BasicBlockSection::All ||
+ BBSectionsType == BasicBlockSection::List ||
+ BBSectionsType == BasicBlockSection::Preset);
+ }
+
+ /// Returns true if basic block labels are to be generated for this function.
+ bool hasBBLabels() const {
+ return BBSectionsType == BasicBlockSection::Labels;
+ }
+
+ void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; }
+
+ /// Assign IsBeginSection IsEndSection fields for basic blocks in this
+ /// function.
+ void assignBeginEndSections();
+
/// getTarget - Return the target machine this machine code is compiled with
const LLVMTargetMachine &getTarget() const { return Target; }
@@ -508,15 +594,16 @@
const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }
- /// getAlignment - Return the alignment (log2, not bytes) of the function.
- unsigned getAlignment() const { return Alignment; }
+ /// getAlignment - Return the alignment of the function.
+ Align getAlignment() const { return Alignment; }
- /// setAlignment - Set the alignment (log2, not bytes) of the function.
- void setAlignment(unsigned A) { Alignment = A; }
+ /// setAlignment - Set the alignment of the function.
+ void setAlignment(Align A) { Alignment = A; }
- /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned.
- void ensureAlignment(unsigned A) {
- if (Alignment < A) Alignment = A;
+ /// ensureAlignment - Make sure the function is at least A bytes aligned.
+ void ensureAlignment(Align A) {
+ if (Alignment < A)
+ Alignment = A;
}
/// exposesReturnsTwice - Returns true if the function calls setjmp or
@@ -547,6 +634,9 @@
}
void setHasWinCFI(bool v) { HasWinCFI = v; }
+ /// True if this function needs frame moves for debug or exceptions.
+ bool needsFrameMoves() const;
+
/// Get the function properties
const MachineFunctionProperties &getProperties() const { return Properties; }
MachineFunctionProperties &getProperties() { return Properties; }
@@ -566,6 +656,10 @@
return const_cast<MachineFunction*>(this)->getInfo<Ty>();
}
+ /// Returns the denormal handling type for the default rounding mode of the
+ /// function.
+ DenormalMode getDenormalMode(const fltSemantics &FPType) const;
+
/// getBlockNumbered - MachineBasicBlocks are automatically numbered when they
/// are inserted into the machine function. The block number for a machine
/// basic block can be found by using the MBB::getNumber method, this method
@@ -630,7 +724,7 @@
/// addLiveIn - Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
- unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC);
+ Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC);
//===--------------------------------------------------------------------===//
// BasicBlock accessor functions.
@@ -706,7 +800,7 @@
/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
/// of `new MachineInstr'.
MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
- bool NoImp = false);
+ bool NoImplicit = false);
/// Create a new MachineInstr which is a copy of \p Orig, identical in all
/// ways except the instruction has no parent, prev, or next. Bundling flags
@@ -740,9 +834,8 @@
/// explicitly deallocated.
MachineMemOperand *getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
- unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
- const MDNode *Ranges = nullptr,
- SyncScope::ID SSID = SyncScope::System,
+ Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
+ const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System,
AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);
@@ -753,6 +846,14 @@
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size);
+ /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
+ /// an existing one, replacing only the MachinePointerInfo and size.
+ /// MachineMemOperands are owned by the MachineFunction and need not be
+ /// explicitly deallocated.
+ MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+ MachinePointerInfo &PtrInfo,
+ uint64_t Size);
+
/// Allocate a new MachineMemOperand by copying an existing one,
/// replacing only AliasAnalysis information. MachineMemOperands are owned
/// by the MachineFunction and need not be explicitly deallocated.
@@ -783,14 +884,15 @@
/// Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegMask();
+ ArrayRef<int> allocateShuffleMask(ArrayRef<int> Mask);
+
/// Allocate and construct an extra info structure for a `MachineInstr`.
///
/// This is allocated on the function's allocator and so lives the life of
/// the function.
- MachineInstr::ExtraInfo *
- createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
- MCSymbol *PreInstrSymbol = nullptr,
- MCSymbol *PostInstrSymbol = nullptr);
+ MachineInstr::ExtraInfo *createMIExtraInfo(
+ ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol = nullptr,
+ MCSymbol *PostInstrSymbol = nullptr, MDNode *HeapAllocMarker = nullptr);
/// Allocate a string and populate it with the given external symbol name.
const char *createExternalSymbolName(StringRef Name);
@@ -817,6 +919,17 @@
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);
+ /// Returns a reference to a list of symbols immediately following calls to
+ /// _setjmp in the function. Used to construct the longjmp target table used
+ /// by Windows Control Flow Guard.
+ const std::vector<MCSymbol *> &getLongjmpTargets() const {
+ return LongjmpTargets;
+ }
+
+ /// Add the specified symbol to the list of valid longjmp targets for Windows
+ /// Control Flow Guard.
+ void addLongjmpTarget(MCSymbol *Target) { LongjmpTargets.push_back(Target); }
+
/// \name Exception Handling
/// \{
@@ -934,14 +1047,6 @@
return CodeViewAnnotations;
}
- /// Record heapallocsites
- void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD);
-
- ArrayRef<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
- getCodeViewHeapAllocSites() const {
- return CodeViewHeapAllocSites;
- }
-
/// Return a reference to the C++ typeinfo for the current function.
const std::vector<const GlobalValue *> &getTypeInfos() const {
return TypeInfos;
@@ -966,22 +1071,45 @@
return VariableDbgInfos;
}
+ /// Start tracking the arguments passed to the call \p CallI.
void addCallArgsForwardingRegs(const MachineInstr *CallI,
CallSiteInfoImpl &&CallInfo) {
- assert(CallI->isCall());
- CallSitesInfo[CallI] = std::move(CallInfo);
+ assert(CallI->isCandidateForCallSiteEntry());
+ bool Inserted =
+ CallSitesInfo.try_emplace(CallI, std::move(CallInfo)).second;
+ (void)Inserted;
+ assert(Inserted && "Call site info not unique");
}
const CallSiteInfoMap &getCallSitesInfo() const {
return CallSitesInfo;
}
- /// Update call sites info by deleting entry for \p Old call instruction.
- /// If \p New is present then transfer \p Old call info to it. This function
- /// should be called before removing call instruction or before replacing
- /// call instruction with new one.
- void updateCallSiteInfo(const MachineInstr *Old,
- const MachineInstr *New = nullptr);
+ /// Following functions update call site info. They should be called before
+ /// removing, replacing or copying call instruction.
+
+ /// Erase the call site info for \p MI. It is used to remove a call
+ /// instruction from the instruction stream.
+ void eraseCallSiteInfo(const MachineInstr *MI);
+ /// Copy the call site info from \p Old to \ New. Its usage is when we are
+ /// making a copy of the instruction that will be inserted at different point
+ /// of the instruction stream.
+ void copyCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New);
+
+ const std::vector<char> &getBBSectionsSymbolPrefix() const {
+ return BBSectionsSymbolPrefix;
+ }
+
+ /// Move the call site info from \p Old to \New call site info. This function
+ /// is used when we are replacing one call instruction with another one to
+ /// the same callee.
+ void moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New);
+
+ unsigned getNewDebugInstrNum() {
+ return ++DebugInstrNumberingCount;
+ }
};
//===--------------------------------------------------------------------===//
@@ -1048,6 +1176,11 @@
}
};
+class MachineFunctionAnalysisManager;
+void verifyMachineFunction(MachineFunctionAnalysisManager *,
+ const std::string &Banner,
+ const MachineFunction &MF);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEFUNCTION_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineInstr.h b/linux-x64/clang/include/llvm/CodeGen/MachineInstr.h
index c82c5b1..6bbe2d0 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineInstr.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineInstr.h
@@ -20,11 +20,9 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -38,12 +36,12 @@
namespace llvm {
+class AAResults;
template <typename T> class ArrayRef;
class DIExpression;
class DILocalVariable;
class MachineBasicBlock;
class MachineFunction;
-class MachineMemOperand;
class MachineRegisterInfo;
class ModuleSlotTracker;
class raw_ostream;
@@ -105,8 +103,11 @@
// no signed wrap.
IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
- FPExcept = 1 << 14, // Instruction may raise floating-point
- // exceptions.
+ NoFPExcept = 1 << 14, // Instruction does not raise
+ // floatint-point exceptions.
+ NoMerge = 1 << 15, // Passes that drop source location info
+ // (e.g. branch folding) should skip
+ // this instruction.
};
private:
@@ -116,8 +117,6 @@
// Operands are allocated by an ArrayRecycler.
MachineOperand *Operands = nullptr; // Pointer to the first operand.
unsigned NumOperands = 0; // Number of operands on instruction.
- using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
- OperandCapacity CapOperands; // Capacity of the Operands array.
uint16_t Flags = 0; // Various bits of additional
// information about machine
@@ -130,6 +129,11 @@
// anything other than to convey comment
// information to AsmPrinter.
+ // OperandCapacity has uint8_t size, so it should be next to AsmPrinterFlags
+ // to properly pack.
+ using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
+ OperandCapacity CapOperands; // Capacity of the Operands array.
+
/// Internal implementation detail class that provides out-of-line storage for
/// extra info used by the machine instruction when this info cannot be stored
/// in-line within the instruction itself.
@@ -137,19 +141,23 @@
/// This has to be defined eagerly due to the implementation constraints of
/// `PointerSumType` where it is used.
class ExtraInfo final
- : TrailingObjects<ExtraInfo, MachineMemOperand *, MCSymbol *> {
+ : TrailingObjects<ExtraInfo, MachineMemOperand *, MCSymbol *, MDNode *> {
public:
static ExtraInfo *create(BumpPtrAllocator &Allocator,
ArrayRef<MachineMemOperand *> MMOs,
MCSymbol *PreInstrSymbol = nullptr,
- MCSymbol *PostInstrSymbol = nullptr) {
+ MCSymbol *PostInstrSymbol = nullptr,
+ MDNode *HeapAllocMarker = nullptr) {
bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+ bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
auto *Result = new (Allocator.Allocate(
- totalSizeToAlloc<MachineMemOperand *, MCSymbol *>(
- MMOs.size(), HasPreInstrSymbol + HasPostInstrSymbol),
+ totalSizeToAlloc<MachineMemOperand *, MCSymbol *, MDNode *>(
+ MMOs.size(), HasPreInstrSymbol + HasPostInstrSymbol,
+ HasHeapAllocMarker),
alignof(ExtraInfo)))
- ExtraInfo(MMOs.size(), HasPreInstrSymbol, HasPostInstrSymbol);
+ ExtraInfo(MMOs.size(), HasPreInstrSymbol, HasPostInstrSymbol,
+ HasHeapAllocMarker);
// Copy the actual data into the trailing objects.
std::copy(MMOs.begin(), MMOs.end(),
@@ -160,6 +168,8 @@
if (HasPostInstrSymbol)
Result->getTrailingObjects<MCSymbol *>()[HasPreInstrSymbol] =
PostInstrSymbol;
+ if (HasHeapAllocMarker)
+ Result->getTrailingObjects<MDNode *>()[0] = HeapAllocMarker;
return Result;
}
@@ -178,6 +188,10 @@
: nullptr;
}
+ MDNode *getHeapAllocMarker() const {
+ return HasHeapAllocMarker ? getTrailingObjects<MDNode *>()[0] : nullptr;
+ }
+
private:
friend TrailingObjects;
@@ -189,6 +203,7 @@
const int NumMMOs;
const bool HasPreInstrSymbol;
const bool HasPostInstrSymbol;
+ const bool HasHeapAllocMarker;
// Implement the `TrailingObjects` internal API.
size_t numTrailingObjects(OverloadToken<MachineMemOperand *>) const {
@@ -197,12 +212,17 @@
size_t numTrailingObjects(OverloadToken<MCSymbol *>) const {
return HasPreInstrSymbol + HasPostInstrSymbol;
}
+ size_t numTrailingObjects(OverloadToken<MDNode *>) const {
+ return HasHeapAllocMarker;
+ }
// Just a boring constructor to allow us to initialize the sizes. Always use
// the `create` routine above.
- ExtraInfo(int NumMMOs, bool HasPreInstrSymbol, bool HasPostInstrSymbol)
+ ExtraInfo(int NumMMOs, bool HasPreInstrSymbol, bool HasPostInstrSymbol,
+ bool HasHeapAllocMarker)
: NumMMOs(NumMMOs), HasPreInstrSymbol(HasPreInstrSymbol),
- HasPostInstrSymbol(HasPostInstrSymbol) {}
+ HasPostInstrSymbol(HasPostInstrSymbol),
+ HasHeapAllocMarker(HasHeapAllocMarker) {}
};
/// Enumeration of the kinds of inline extra info available. It is important
@@ -229,6 +249,10 @@
DebugLoc debugLoc; // Source line information.
+ /// Unique instruction number. Used by DBG_INSTR_REFs to refer to the values
+ /// defined by this instruction.
+ unsigned DebugInstrNum;
+
// Intrusive list support
friend struct ilist_traits<MachineInstr>;
friend struct ilist_callback_traits<MachineBasicBlock>;
@@ -247,6 +271,10 @@
// MachineInstrs are pool-allocated and owned by MachineFunction.
friend class MachineFunction;
+ void
+ dumprImpl(const MachineRegisterInfo &MRI, unsigned Depth, unsigned MaxDepth,
+ SmallPtrSetImpl<const MachineInstr *> &AlreadySeenInstrs) const;
+
public:
MachineInstr(const MachineInstr &) = delete;
MachineInstr &operator=(const MachineInstr &) = delete;
@@ -256,6 +284,9 @@
const MachineBasicBlock* getParent() const { return Parent; }
MachineBasicBlock* getParent() { return Parent; }
+ /// Move the instruction before \p MovePos.
+ void moveBefore(MachineInstr *MovePos);
+
/// Return the function that contains the basic block that this instruction
/// belongs to.
///
@@ -384,10 +415,31 @@
/// Returns the debug location id of this MachineInstr.
const DebugLoc &getDebugLoc() const { return debugLoc; }
+ /// Return the operand containing the offset to be used if this DBG_VALUE
+ /// instruction is indirect; will be an invalid register if this value is
+ /// not indirect, and an immediate with value 0 otherwise.
+ const MachineOperand &getDebugOffset() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return getOperand(1);
+ }
+ MachineOperand &getDebugOffset() {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return getOperand(1);
+ }
+
+ /// Return the operand for the debug variable referenced by
+ /// this DBG_VALUE instruction.
+ const MachineOperand &getDebugVariableOp() const;
+ MachineOperand &getDebugVariableOp();
+
/// Return the debug variable referenced by
/// this DBG_VALUE instruction.
const DILocalVariable *getDebugVariable() const;
+ /// Return the operand for the complex address expression referenced by
+ /// this DBG_VALUE instruction.
+ MachineOperand &getDebugExpressionOp();
+
/// Return the complex address expression referenced by
/// this DBG_VALUE instruction.
const DIExpression *getDebugExpression() const;
@@ -396,6 +448,18 @@
/// this DBG_LABEL instruction.
const DILabel *getDebugLabel() const;
+ /// Fetch the instruction number of this MachineInstr. If it does not have
+ /// one already, a new and unique number will be assigned.
+ unsigned getDebugInstrNum();
+
+ /// Examine the instruction number of this MachineInstr. May be zero if
+ /// it hasn't been assigned a number yet.
+ unsigned peekDebugInstrNum() const { return DebugInstrNum; }
+
+ /// Set instruction number of this MachineInstr. Avoid using unless you're
+ /// deserializing this information.
+ void setDebugInstrNum(unsigned Num) { DebugInstrNum = Num; }
+
/// Emit an error referring to the source location of this instruction.
/// This should only be used for inline assembly that is somehow
/// impossible to compile. Other errors should have been handled much
@@ -413,6 +477,11 @@
/// Retuns the total number of operands.
unsigned getNumOperands() const { return NumOperands; }
+ /// Returns the total number of operands which are debug locations.
+ unsigned getNumDebugOperands() const {
+ return std::distance(debug_operands().begin(), debug_operands().end());
+ }
+
const MachineOperand& getOperand(unsigned i) const {
assert(i < getNumOperands() && "getOperand() out of range!");
return Operands[i];
@@ -422,11 +491,59 @@
return Operands[i];
}
+ MachineOperand &getDebugOperand(unsigned Index) {
+ assert(Index < getNumDebugOperands() && "getDebugOperand() out of range!");
+ return *(debug_operands().begin() + Index);
+ }
+ const MachineOperand &getDebugOperand(unsigned Index) const {
+ assert(Index < getNumDebugOperands() && "getDebugOperand() out of range!");
+ return *(debug_operands().begin() + Index);
+ }
+
+ /// Returns a pointer to the operand corresponding to a debug use of Reg, or
+ /// nullptr if Reg is not used in any debug operand.
+ const MachineOperand *getDebugOperandForReg(Register Reg) const {
+ const MachineOperand *RegOp =
+ find_if(debug_operands(), [Reg](const MachineOperand &Op) {
+ return Op.isReg() && Op.getReg() == Reg;
+ });
+ return RegOp == adl_end(debug_operands()) ? nullptr : RegOp;
+ }
+ MachineOperand *getDebugOperandForReg(Register Reg) {
+ MachineOperand *RegOp =
+ find_if(debug_operands(), [Reg](const MachineOperand &Op) {
+ return Op.isReg() && Op.getReg() == Reg;
+ });
+ return RegOp == adl_end(debug_operands()) ? nullptr : RegOp;
+ }
+
+ unsigned getDebugOperandIndex(const MachineOperand *Op) const {
+ assert(Op >= adl_begin(debug_operands()) &&
+ Op <= adl_end(debug_operands()) && "Expected a debug operand.");
+ return std::distance(adl_begin(debug_operands()), Op);
+ }
+
/// Returns the total number of definitions.
unsigned getNumDefs() const {
return getNumExplicitDefs() + MCID->getNumImplicitDefs();
}
+ /// Returns true if the instruction has implicit definition.
+ bool hasImplicitDef() const {
+ for (unsigned I = getNumExplicitOperands(), E = getNumOperands();
+ I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ if (MO.isDef() && MO.isImplicit())
+ return true;
+ }
+ return false;
+ }
+
+ /// Returns the implicit operands number.
+ unsigned getNumImplicitOperands() const {
+ return getNumOperands() - getNumExplicitOperands();
+ }
+
/// Return true if operand \p OpIdx is a subregister index.
bool isOperandSubregIdx(unsigned OpIdx) const {
assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
@@ -478,6 +595,17 @@
iterator_range<const_mop_iterator> implicit_operands() const {
return make_range(explicit_operands().end(), operands_end());
}
+ /// Returns a range over all operands that are used to determine the variable
+ /// location for this DBG_VALUE instruction.
+ iterator_range<mop_iterator> debug_operands() {
+ assert(isDebugValue() && "Must be a debug value instruction.");
+ return make_range(operands_begin(), operands_begin() + 1);
+ }
+ /// \copydoc debug_operands()
+ iterator_range<const_mop_iterator> debug_operands() const {
+ assert(isDebugValue() && "Must be a debug value instruction.");
+ return make_range(operands_begin(), operands_begin() + 1);
+ }
/// Returns a range over all explicit operands that are register definitions.
/// Implicit definition are not included!
iterator_range<mop_iterator> defs() {
@@ -577,6 +705,16 @@
return nullptr;
}
+ /// Helper to extract a heap alloc marker if one has been added.
+ MDNode *getHeapAllocMarker() const {
+ if (!Info)
+ return nullptr;
+ if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+ return EI->getHeapAllocMarker();
+
+ return nullptr;
+ }
+
/// API for querying MachineInstr properties. They are the same as MCInstrDesc
/// queries but they are bundle aware.
@@ -602,6 +740,12 @@
return hasPropertyInBundle(1ULL << MCFlag, Type);
}
+ /// Return true if this is an instruction that should go through the usual
+ /// legalization steps.
+ bool isPreISelOpcode(QueryType Type = IgnoreBundle) const {
+ return hasProperty(MCID::PreISelOpcode, Type);
+ }
+
/// Return true if this instruction can have a variable number of operands.
/// In this case, the variable operands will be after the normal
/// operands but before the implicit definitions and uses (if any are
@@ -636,6 +780,14 @@
return hasProperty(MCID::Call, Type);
}
+ /// Return true if this is a call instruction that may have an associated
+ /// call site entry in the debug info.
+ bool isCandidateForCallSiteEntry(QueryType Type = IgnoreBundle) const;
+ /// Return true if copying, moving, or erasing this instruction requires
+ /// updating Call Site Info (see \ref copyCallSiteInfo, \ref moveCallSiteInfo,
+ /// \ref eraseCallSiteInfo).
+ bool shouldUpdateCallSiteInfo() const;
+
/// Returns true if the specified instruction stops control flow
/// from executing the instruction immediately following it. Examples include
/// unconditional branches and return instructions.
@@ -654,7 +806,7 @@
/// Returns true if this is a conditional, unconditional, or indirect branch.
/// Predicates below can be used to discriminate between
- /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
+ /// these cases, and the TargetInstrInfo::analyzeBranch method can be used to
/// get more information.
bool isBranch(QueryType Type = AnyInBundle) const {
return hasProperty(MCID::Branch, Type);
@@ -668,18 +820,18 @@
/// Return true if this is a branch which may fall
/// through to the next instruction or may transfer control flow to some other
- /// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more
+ /// block. The TargetInstrInfo::analyzeBranch method can be used to get more
/// information about this branch.
bool isConditionalBranch(QueryType Type = AnyInBundle) const {
- return isBranch(Type) & !isBarrier(Type) & !isIndirectBranch(Type);
+ return isBranch(Type) && !isBarrier(Type) && !isIndirectBranch(Type);
}
/// Return true if this is a branch which always
/// transfers control flow to some other block. The
- /// TargetInstrInfo::AnalyzeBranch method can be used to get more information
+ /// TargetInstrInfo::analyzeBranch method can be used to get more information
/// about this branch.
bool isUnconditionalBranch(QueryType Type = AnyInBundle) const {
- return isBranch(Type) & isBarrier(Type) & !isIndirectBranch(Type);
+ return isBranch(Type) && isBarrier(Type) && !isIndirectBranch(Type);
}
/// Return true if this instruction has a predicate operand that
@@ -838,10 +990,10 @@
/// instruction that can in principle raise an exception, as indicated
/// by the MCID::MayRaiseFPException property, *and* at the same time,
/// the instruction is used in a context where we expect floating-point
- /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+ /// exceptions are not disabled, as indicated by the NoFPExcept MI flag.
bool mayRaiseFPException() const {
return hasProperty(MCID::MayRaiseFPException) &&
- getFlag(MachineInstr::MIFlag::FPExcept);
+ !getFlag(MachineInstr::MIFlag::NoFPExcept);
}
//===--------------------------------------------------------------------===//
@@ -1009,26 +1161,28 @@
bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; }
- bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); }
+ bool isDebugRef() const { return getOpcode() == TargetOpcode::DBG_INSTR_REF; }
+ bool isDebugInstr() const {
+ return isDebugValue() || isDebugLabel() || isDebugRef();
+ }
- /// A DBG_VALUE is indirect iff the first operand is a register and
- /// the second operand is an immediate.
+ bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }
+
+ /// A DBG_VALUE is indirect iff the location operand is a register and
+ /// the offset operand is an immediate.
bool isIndirectDebugValue() const {
- return isDebugValue()
- && getOperand(0).isReg()
- && getOperand(1).isImm();
+ return isDebugValue() && getDebugOperand(0).isReg() && isDebugOffsetImm();
}
/// A DBG_VALUE is an entry value iff its debug expression contains the
- /// DW_OP_entry_value DWARF operation.
- bool isDebugEntryValue() const {
- return isDebugValue() && getDebugExpression()->isEntryValue();
- }
+ /// DW_OP_LLVM_entry_value operation.
+ bool isDebugEntryValue() const;
/// Return true if the instruction is a debug value which describes a part of
/// a variable as unavailable.
bool isUndefDebugValue() const {
- return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg();
+ return isDebugValue() && getDebugOperand(0).isReg() &&
+ !getDebugOperand(0).getReg().isValid();
}
bool isPHI() const {
@@ -1103,9 +1257,11 @@
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::DBG_INSTR_REF:
case TargetOpcode::DBG_LABEL:
case TargetOpcode::LIFETIME_START:
case TargetOpcode::LIFETIME_END:
+ case TargetOpcode::PSEUDO_PROBE:
return true;
}
}
@@ -1140,7 +1296,7 @@
/// is a read of a super-register.
/// This does not count partial redefines of virtual registers as reads:
/// %reg1024:6 = OP.
- bool readsRegister(unsigned Reg,
+ bool readsRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterUseOperandIdx(Reg, false, TRI) != -1;
}
@@ -1148,20 +1304,20 @@
/// Return true if the MachineInstr reads the specified virtual register.
/// Take into account that a partial define is a
/// read-modify-write operation.
- bool readsVirtualRegister(unsigned Reg) const {
+ bool readsVirtualRegister(Register Reg) const {
return readsWritesVirtualRegister(Reg).first;
}
/// Return a pair of bools (reads, writes) indicating if this instruction
/// reads or writes Reg. This also considers partial defines.
/// If Ops is not null, all operand indices for Reg are added.
- std::pair<bool,bool> readsWritesVirtualRegister(unsigned Reg,
+ std::pair<bool,bool> readsWritesVirtualRegister(Register Reg,
SmallVectorImpl<unsigned> *Ops = nullptr) const;
/// Return true if the MachineInstr kills the specified register.
/// If TargetRegisterInfo is passed, then it also checks if there is
/// a kill of a super-register.
- bool killsRegister(unsigned Reg,
+ bool killsRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterUseOperandIdx(Reg, true, TRI) != -1;
}
@@ -1170,7 +1326,7 @@
/// If TargetRegisterInfo is passed, then it also checks
/// if there is a def of a super-register.
/// NOTE: It's ignoring subreg indices on virtual registers.
- bool definesRegister(unsigned Reg,
+ bool definesRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1;
}
@@ -1178,38 +1334,39 @@
/// Return true if the MachineInstr modifies (fully define or partially
/// define) the specified register.
/// NOTE: It's ignoring subreg indices on virtual registers.
- bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const {
+ bool modifiesRegister(Register Reg,
+ const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1;
}
/// Returns true if the register is dead in this machine instruction.
/// If TargetRegisterInfo is passed, then it also checks
/// if there is a dead def of a super-register.
- bool registerDefIsDead(unsigned Reg,
+ bool registerDefIsDead(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1;
}
/// Returns true if the MachineInstr has an implicit-use operand of exactly
/// the given register (not considering sub/super-registers).
- bool hasRegisterImplicitUseOperand(unsigned Reg) const;
+ bool hasRegisterImplicitUseOperand(Register Reg) const;
/// Returns the operand index that is a use of the specific register or -1
/// if it is not found. It further tightens the search criteria to a use
/// that kills the register if isKill is true.
- int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false,
+ int findRegisterUseOperandIdx(Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) const;
/// Wrapper for findRegisterUseOperandIdx, it returns
/// a pointer to the MachineOperand rather than an index.
- MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false,
+ MachineOperand *findRegisterUseOperand(Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) {
int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI);
return (Idx == -1) ? nullptr : &getOperand(Idx);
}
const MachineOperand *findRegisterUseOperand(
- unsigned Reg, bool isKill = false,
+ Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) const {
return const_cast<MachineInstr *>(this)->
findRegisterUseOperand(Reg, isKill, TRI);
@@ -1221,14 +1378,14 @@
/// overlap the specified register. If TargetRegisterInfo is non-null,
/// then it also checks if there is a def of a super-register.
/// This may also return a register mask operand when Overlap is true.
- int findRegisterDefOperandIdx(unsigned Reg,
+ int findRegisterDefOperandIdx(Register Reg,
bool isDead = false, bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) const;
/// Wrapper for findRegisterDefOperandIdx, it returns
/// a pointer to the MachineOperand rather than an index.
MachineOperand *
- findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ findRegisterDefOperand(Register Reg, bool isDead = false,
bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) {
int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI);
@@ -1236,7 +1393,7 @@
}
const MachineOperand *
- findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ findRegisterDefOperand(Register Reg, bool isDead = false,
bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) const {
return const_cast<MachineInstr *>(this)->findRegisterDefOperand(
@@ -1283,7 +1440,7 @@
///
/// \pre CurRC must not be NULL.
const TargetRegisterClass *getRegClassConstraintEffectForVReg(
- unsigned Reg, const TargetRegisterClass *CurRC,
+ Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
bool ExploreBundle = false) const;
@@ -1346,39 +1503,39 @@
/// Replace all occurrences of FromReg with ToReg:SubIdx,
/// properly composing subreg indices where necessary.
- void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx,
+ void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx,
const TargetRegisterInfo &RegInfo);
/// We have determined MI kills a register. Look for the
/// operand that uses it and mark it as IsKill. If AddIfNotFound is true,
/// add a implicit operand if it's not found. Returns true if the operand
/// exists / is added.
- bool addRegisterKilled(unsigned IncomingReg,
+ bool addRegisterKilled(Register IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound = false);
/// Clear all kill flags affecting Reg. If RegInfo is provided, this includes
/// all aliasing registers.
- void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo);
+ void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo);
/// We have determined MI defined a register without a use.
/// Look for the operand that defines it and mark it as IsDead. If
/// AddIfNotFound is true, add a implicit operand if it's not found. Returns
/// true if the operand exists / is added.
- bool addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo,
+ bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo,
bool AddIfNotFound = false);
/// Clear all dead flags on operands defining register @p Reg.
- void clearRegisterDeads(unsigned Reg);
+ void clearRegisterDeads(Register Reg);
/// Mark all subregister defs of register @p Reg with the undef flag.
/// This function is used when we determined to have a subregister def in an
/// otherwise undefined super register.
- void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true);
+ void setRegisterDefReadUndef(Register Reg, bool IsUndef = true);
/// We have determined MI defines a register. Make sure there is an operand
/// defining Reg.
- void addRegisterDefined(unsigned Reg,
+ void addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo = nullptr);
/// Mark every physreg used by this instruction as
@@ -1386,13 +1543,13 @@
///
/// On instructions with register mask operands, also add implicit-def
/// operands for all registers in UsedRegs.
- void setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+ void setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
const TargetRegisterInfo &TRI);
/// Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
- bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const;
+ bool isSafeToMove(AAResults *AA, bool &SawStore) const;
/// Returns true if this instruction's memory access aliases the memory
/// access of Other.
@@ -1404,7 +1561,7 @@
/// @param AA Optional alias analysis, used to compare memory operands.
/// @param Other MachineInstr to check aliasing against.
/// @param UseTBAA Whether to pass TBAA information to alias analysis.
- bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const;
+ bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const;
/// Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
@@ -1419,7 +1576,7 @@
/// argument area of a function (if it does not change). If the instruction
/// does multiple loads, this returns true only if all of the loads are
/// dereferenceable and invariant.
- bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const;
+ bool isDereferenceableInvariantLoad(AAResults *AA) const;
/// If the specified instruction is a PHI that always merges together the
/// same virtual register, return the register, otherwise return 0.
@@ -1485,6 +1642,10 @@
bool AddNewLine = true,
const TargetInstrInfo *TII = nullptr) const;
void dump() const;
+ /// Print on dbgs() the current instruction and the instructions defining its
+ /// operands and so on until we reach \p MaxDepth.
+ void dumpr(const MachineRegisterInfo &MRI,
+ unsigned MaxDepth = UINT_MAX) const;
/// @}
//===--------------------------------------------------------------------===//
@@ -1578,6 +1739,12 @@
/// replace ours with it.
void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI);
+ /// Set a marker on instructions that denotes where we should create and emit
+ /// heap alloc site labels. This waits until after instruction selection and
+ /// optimizations to create the label, so it should still work if the
+ /// instruction is removed or duplicated.
+ void setHeapAllocMarker(MachineFunction &MF, MDNode *MD);
+
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
@@ -1600,12 +1767,31 @@
/// Add all implicit def and use operands to this instruction.
void addImplicitDefUseOperands(MachineFunction &MF);
- /// Scan instructions following MI and collect any matching DBG_VALUEs.
+ /// Scan instructions immediately following MI and collect any matching
+ /// DBG_VALUEs.
void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues);
- /// Find all DBG_VALUEs immediately following this instruction that point
- /// to a register def in this instruction and point them to \p Reg instead.
- void changeDebugValuesDefReg(unsigned Reg);
+ /// Find all DBG_VALUEs that point to the register def in this instruction
+ /// and point them to \p Reg instead.
+ void changeDebugValuesDefReg(Register Reg);
+
+ /// Returns the Intrinsic::ID for this instruction.
+ /// \pre Must have an intrinsic ID operand.
+ unsigned getIntrinsicID() const {
+ return getOperand(getNumExplicitDefs()).getIntrinsicID();
+ }
+
+ /// Sets all register debug operands in this debug value instruction to be
+ /// undef.
+ void setDebugValueUndef() {
+ assert(isDebugValue() && "Must be a debug value instruction.");
+ for (MachineOperand &MO : debug_operands()) {
+ if (MO.isReg()) {
+ MO.setReg(0);
+ MO.setSubReg(0);
+ }
+ }
+ }
private:
/// If this instruction is embedded into a MachineFunction, return the
@@ -1630,8 +1816,14 @@
/// this MI and the given operand index \p OpIdx.
/// If the related operand does not constrained Reg, this returns CurRC.
const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl(
- unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+ unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const;
+
+ /// Stores extra instruction information inline or allocates as ExtraInfo
+ /// based on the number of pointers.
+ void setExtraInfo(MachineFunction &MF, ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol,
+ MDNode *HeapAllocMarker);
};
/// Special DenseMapInfo traits to compare MachineInstr* by *value* of the
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBuilder.h b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBuilder.h
index 6d7fb72..115c501 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -40,20 +40,30 @@
namespace RegState {
- enum {
- Define = 0x2,
- Implicit = 0x4,
- Kill = 0x8,
- Dead = 0x10,
- Undef = 0x20,
- EarlyClobber = 0x40,
- Debug = 0x80,
- InternalRead = 0x100,
- Renamable = 0x200,
- DefineNoRead = Define | Undef,
- ImplicitDefine = Implicit | Define,
- ImplicitKill = Implicit | Kill
- };
+enum {
+ /// Register definition.
+ Define = 0x2,
+ /// Not emitted register (e.g. carry, or temporary result).
+ Implicit = 0x4,
+ /// The last use of a register.
+ Kill = 0x8,
+ /// Unused definition.
+ Dead = 0x10,
+ /// Value of the register doesn't matter.
+ Undef = 0x20,
+ /// Register definition happens before uses.
+ EarlyClobber = 0x40,
+ /// Register 'use' is for debugging purpose.
+ Debug = 0x80,
+ /// Register reads a value that is defined inside the same instruction or
+ /// bundle.
+ InternalRead = 0x100,
+ /// Register that may be renamed.
+ Renamable = 0x200,
+ DefineNoRead = Define | Undef,
+ ImplicitDefine = Implicit | Define,
+ ImplicitKill = Implicit | Kill
+};
} // end namespace RegState
@@ -85,7 +95,7 @@
Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); }
/// Add a new virtual register operand.
- const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
+ const MachineInstrBuilder &addReg(Register RegNo, unsigned flags = 0,
unsigned SubReg = 0) const {
assert((flags & 0x1) == 0 &&
"Passing in 'true' to addReg is forbidden! Use enums instead.");
@@ -104,14 +114,14 @@
}
/// Add a virtual register definition operand.
- const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0,
+ const MachineInstrBuilder &addDef(Register RegNo, unsigned Flags = 0,
unsigned SubReg = 0) const {
return addReg(RegNo, Flags | RegState::Define, SubReg);
}
/// Add a virtual register use operand. It is an error for Flags to contain
/// `RegState::Define` when calling this function.
- const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0,
+ const MachineInstrBuilder &addUse(Register RegNo, unsigned Flags = 0,
unsigned SubReg = 0) const {
assert(!(Flags & RegState::Define) &&
"Misleading addUse defines register, use addReg instead.");
@@ -135,7 +145,7 @@
}
const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags));
return *this;
}
@@ -145,42 +155,42 @@
return *this;
}
- const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx,
- int Offset = 0,
- unsigned char TargetFlags = 0) const {
+ const MachineInstrBuilder &
+ addConstantPoolIndex(unsigned Idx, int Offset = 0,
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
return *this;
}
const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset,
TargetFlags));
return *this;
}
const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags));
return *this;
}
const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV,
int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags));
return *this;
}
const MachineInstrBuilder &addExternalSymbol(const char *FnName,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags));
return *this;
}
const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA,
int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags));
return *this;
}
@@ -250,6 +260,11 @@
return *this;
}
+ const MachineInstrBuilder &addShuffleMask(ArrayRef<int> Val) const {
+ MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val));
+ return *this;
+ }
+
const MachineInstrBuilder &addSym(MCSymbol *Sym,
unsigned char TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags));
@@ -290,6 +305,9 @@
case MachineOperand::MO_BlockAddress:
return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off,
TargetFlags);
+ case MachineOperand::MO_JumpTableIndex:
+ assert(off == 0 && "cannot create offset into jump tables");
+ return addJumpTableIndex(Disp.getIndex(), TargetFlags);
}
}
@@ -316,7 +334,7 @@
/// This version of the builder sets up the first operand as a
/// destination virtual register.
inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
- const MCInstrDesc &MCID, unsigned DestReg) {
+ const MCInstrDesc &MCID, Register DestReg) {
return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL))
.addReg(DestReg, RegState::Define);
}
@@ -327,7 +345,7 @@
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
BB.insert(I, MI);
@@ -343,7 +361,7 @@
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::instr_iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
BB.insert(I, MI);
@@ -352,7 +370,7 @@
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
// Calling the overload for instr_iterator is always correct. However, the
// definition is not available in headers, so inline the check.
if (I.isInsideBundle())
@@ -362,7 +380,7 @@
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
return BuildMI(BB, *I, DL, MCID, DestReg);
}
@@ -416,7 +434,7 @@
/// end of the given MachineBasicBlock, and sets up the first operand as a
/// destination virtual register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
- const MCInstrDesc &MCID, unsigned DestReg) {
+ const MCInstrDesc &MCID, Register DestReg) {
return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
}
@@ -426,7 +444,7 @@
/// second operand is an immediate.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr);
/// This version of the builder builds a DBG_VALUE intrinsic
@@ -442,7 +460,7 @@
MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr);
/// This version of the builder builds a DBG_VALUE intrinsic
@@ -490,16 +508,13 @@
/// Get all register state flags from machine operand \p RegOp.
inline unsigned getRegState(const MachineOperand &RegOp) {
assert(RegOp.isReg() && "Not a register operand");
- return getDefRegState(RegOp.isDef()) |
- getImplRegState(RegOp.isImplicit()) |
- getKillRegState(RegOp.isKill()) |
- getDeadRegState(RegOp.isDead()) |
- getUndefRegState(RegOp.isUndef()) |
- getInternalReadRegState(RegOp.isInternalRead()) |
- getDebugRegState(RegOp.isDebug()) |
- getRenamableRegState(
- TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) &&
- RegOp.isRenamable());
+ return getDefRegState(RegOp.isDef()) | getImplRegState(RegOp.isImplicit()) |
+ getKillRegState(RegOp.isKill()) | getDeadRegState(RegOp.isDead()) |
+ getUndefRegState(RegOp.isUndef()) |
+ getInternalReadRegState(RegOp.isInternalRead()) |
+ getDebugRegState(RegOp.isDebug()) |
+ getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) &&
+ RegOp.isRenamable());
}
/// Helper class for constructing bundles of MachineInstrs.
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundle.h b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundle.h
index 1810d23..8a73f9a 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundle.h
@@ -75,12 +75,12 @@
}
//===----------------------------------------------------------------------===//
-// MachineOperand iterator
+// MachineBundleOperand iterator
//
-/// MachineOperandIteratorBase - Iterator that can visit all operands on a
-/// MachineInstr, or all operands on a bundle of MachineInstrs. This class is
-/// not intended to be used directly, use one of the sub-classes instead.
+/// MIBundleOperandIteratorBase - Iterator that visits all operands in a bundle
+/// of MachineInstrs. This class is not intended to be used directly, use one
+/// of the sub-classes instead.
///
/// Intended use:
///
@@ -90,7 +90,10 @@
/// ...
/// }
///
-class MachineOperandIteratorBase {
+template <typename ValueT>
+class MIBundleOperandIteratorBase
+ : public iterator_facade_base<MIBundleOperandIteratorBase<ValueT>,
+ std::forward_iterator_tag, ValueT> {
MachineBasicBlock::instr_iterator InstrI, InstrE;
MachineInstr::mop_iterator OpI, OpE;
@@ -99,35 +102,34 @@
void advance() {
while (OpI == OpE) {
// Don't advance off the basic block, or into a new bundle.
- if (++InstrI == InstrE || !InstrI->isInsideBundle())
+ if (++InstrI == InstrE || !InstrI->isInsideBundle()) {
+ InstrI = InstrE;
break;
+ }
OpI = InstrI->operands_begin();
OpE = InstrI->operands_end();
}
}
protected:
- /// MachineOperandIteratorBase - Create an iterator that visits all operands
+ /// MIBundleOperandIteratorBase - Create an iterator that visits all operands
/// on MI, or all operands on every instruction in the bundle containing MI.
///
/// @param MI The instruction to examine.
- /// @param WholeBundle When true, visit all operands on the entire bundle.
///
- explicit MachineOperandIteratorBase(MachineInstr &MI, bool WholeBundle) {
- if (WholeBundle) {
- InstrI = getBundleStart(MI.getIterator());
- InstrE = MI.getParent()->instr_end();
- } else {
- InstrI = InstrE = MI.getIterator();
- ++InstrE;
- }
+ explicit MIBundleOperandIteratorBase(MachineInstr &MI) {
+ InstrI = getBundleStart(MI.getIterator());
+ InstrE = MI.getParent()->instr_end();
OpI = InstrI->operands_begin();
OpE = InstrI->operands_end();
- if (WholeBundle)
- advance();
+ advance();
}
- MachineOperand &deref() const { return *OpI; }
+ /// Constructor for an iterator past the last iteration: both instruction
+ /// iterators point to the end of the BB and OpI == OpE.
+ explicit MIBundleOperandIteratorBase(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : InstrI(InstrE), InstrE(InstrE), OpI(OpE), OpE(OpE) {}
public:
/// isValid - Returns true until all the operands have been visited.
@@ -140,123 +142,148 @@
advance();
}
+ ValueT &operator*() const { return *OpI; }
+ ValueT *operator->() const { return &*OpI; }
+
+ bool operator==(const MIBundleOperandIteratorBase &Arg) const {
+ // Iterators are equal, if InstrI matches and either OpIs match or OpI ==
+ // OpE match for both. The second condition allows us to construct an 'end'
+ // iterator, without finding the last instruction in a bundle up-front.
+ return InstrI == Arg.InstrI &&
+ (OpI == Arg.OpI || (OpI == OpE && Arg.OpI == Arg.OpE));
+ }
/// getOperandNo - Returns the number of the current operand relative to its
/// instruction.
///
unsigned getOperandNo() const {
return OpI - InstrI->operands_begin();
}
-
- /// VirtRegInfo - Information about a virtual register used by a set of operands.
- ///
- struct VirtRegInfo {
- /// Reads - One of the operands read the virtual register. This does not
- /// include undef or internal use operands, see MO::readsReg().
- bool Reads;
-
- /// Writes - One of the operands writes the virtual register.
- bool Writes;
-
- /// Tied - Uses and defs must use the same register. This can be because of
- /// a two-address constraint, or there may be a partial redefinition of a
- /// sub-register.
- bool Tied;
- };
-
- /// Information about how a physical register Reg is used by a set of
- /// operands.
- struct PhysRegInfo {
- /// There is a regmask operand indicating Reg is clobbered.
- /// \see MachineOperand::CreateRegMask().
- bool Clobbered;
-
- /// Reg or one of its aliases is defined. The definition may only cover
- /// parts of the register.
- bool Defined;
- /// Reg or a super-register is defined. The definition covers the full
- /// register.
- bool FullyDefined;
-
- /// Reg or one of its aliases is read. The register may only be read
- /// partially.
- bool Read;
- /// Reg or a super-register is read. The full register is read.
- bool FullyRead;
-
- /// Either:
- /// - Reg is FullyDefined and all defs of reg or an overlapping
- /// register are dead, or
- /// - Reg is completely dead because "defined" by a clobber.
- bool DeadDef;
-
- /// Reg is Defined and all defs of reg or an overlapping register are
- /// dead.
- bool PartialDeadDef;
-
- /// There is a use operand of reg or a super-register with kill flag set.
- bool Killed;
- };
-
- /// analyzeVirtReg - Analyze how the current instruction or bundle uses a
- /// virtual register. This function should not be called after operator++(),
- /// it expects a fresh iterator.
- ///
- /// @param Reg The virtual register to analyze.
- /// @param Ops When set, this vector will receive an (MI, OpNum) entry for
- /// each operand referring to Reg.
- /// @returns A filled-in RegInfo struct.
- VirtRegInfo analyzeVirtReg(unsigned Reg,
- SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops = nullptr);
-
- /// analyzePhysReg - Analyze how the current instruction or bundle uses a
- /// physical register. This function should not be called after operator++(),
- /// it expects a fresh iterator.
- ///
- /// @param Reg The physical register to analyze.
- /// @returns A filled-in PhysRegInfo struct.
- PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI);
-};
-
-/// MIOperands - Iterate over operands of a single instruction.
-///
-class MIOperands : public MachineOperandIteratorBase {
-public:
- MIOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, false) {}
- MachineOperand &operator* () const { return deref(); }
- MachineOperand *operator->() const { return &deref(); }
-};
-
-/// ConstMIOperands - Iterate over operands of a single const instruction.
-///
-class ConstMIOperands : public MachineOperandIteratorBase {
-public:
- ConstMIOperands(const MachineInstr &MI)
- : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), false) {}
- const MachineOperand &operator* () const { return deref(); }
- const MachineOperand *operator->() const { return &deref(); }
};
/// MIBundleOperands - Iterate over all operands in a bundle of machine
/// instructions.
///
-class MIBundleOperands : public MachineOperandIteratorBase {
+class MIBundleOperands : public MIBundleOperandIteratorBase<MachineOperand> {
+ /// Constructor for an iterator past the last iteration.
+ MIBundleOperands(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : MIBundleOperandIteratorBase(InstrE, OpE) {}
+
public:
- MIBundleOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, true) {}
- MachineOperand &operator* () const { return deref(); }
- MachineOperand *operator->() const { return &deref(); }
+ MIBundleOperands(MachineInstr &MI) : MIBundleOperandIteratorBase(MI) {}
+
+ /// Returns an iterator past the last iteration.
+ static MIBundleOperands end(const MachineBasicBlock &MBB) {
+ return {const_cast<MachineBasicBlock &>(MBB).instr_end(),
+ const_cast<MachineBasicBlock &>(MBB).instr_begin()->operands_end()};
+ }
};
/// ConstMIBundleOperands - Iterate over all operands in a const bundle of
/// machine instructions.
///
-class ConstMIBundleOperands : public MachineOperandIteratorBase {
+class ConstMIBundleOperands
+ : public MIBundleOperandIteratorBase<const MachineOperand> {
+
+ /// Constructor for an iterator past the last iteration.
+ ConstMIBundleOperands(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : MIBundleOperandIteratorBase(InstrE, OpE) {}
+
public:
ConstMIBundleOperands(const MachineInstr &MI)
- : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), true) {}
- const MachineOperand &operator* () const { return deref(); }
- const MachineOperand *operator->() const { return &deref(); }
+ : MIBundleOperandIteratorBase(const_cast<MachineInstr &>(MI)) {}
+
+ /// Returns an iterator past the last iteration.
+ static ConstMIBundleOperands end(const MachineBasicBlock &MBB) {
+ return {const_cast<MachineBasicBlock &>(MBB).instr_end(),
+ const_cast<MachineBasicBlock &>(MBB).instr_begin()->operands_end()};
+ }
};
+inline iterator_range<ConstMIBundleOperands>
+const_mi_bundle_ops(const MachineInstr &MI) {
+ return make_range(ConstMIBundleOperands(MI),
+ ConstMIBundleOperands::end(*MI.getParent()));
+}
+
+inline iterator_range<MIBundleOperands> mi_bundle_ops(MachineInstr &MI) {
+ return make_range(MIBundleOperands(MI),
+ MIBundleOperands::end(*MI.getParent()));
+}
+
+/// VirtRegInfo - Information about a virtual register used by a set of
+/// operands.
+///
+struct VirtRegInfo {
+ /// Reads - One of the operands read the virtual register. This does not
+ /// include undef or internal use operands, see MO::readsReg().
+ bool Reads;
+
+ /// Writes - One of the operands writes the virtual register.
+ bool Writes;
+
+ /// Tied - Uses and defs must use the same register. This can be because of
+ /// a two-address constraint, or there may be a partial redefinition of a
+ /// sub-register.
+ bool Tied;
+};
+
+/// AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses
+/// a virtual register. This function should not be called after operator++(),
+/// it expects a fresh iterator.
+///
+/// @param Reg The virtual register to analyze.
+/// @param Ops When set, this vector will receive an (MI, OpNum) entry for
+/// each operand referring to Reg.
+/// @returns A filled-in RegInfo struct.
+VirtRegInfo AnalyzeVirtRegInBundle(
+ MachineInstr &MI, Register Reg,
+ SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops = nullptr);
+
+/// Information about how a physical register Reg is used by a set of
+/// operands.
+struct PhysRegInfo {
+ /// There is a regmask operand indicating Reg is clobbered.
+ /// \see MachineOperand::CreateRegMask().
+ bool Clobbered;
+
+ /// Reg or one of its aliases is defined. The definition may only cover
+ /// parts of the register.
+ bool Defined;
+ /// Reg or a super-register is defined. The definition covers the full
+ /// register.
+ bool FullyDefined;
+
+ /// Reg or one of its aliases is read. The register may only be read
+ /// partially.
+ bool Read;
+ /// Reg or a super-register is read. The full register is read.
+ bool FullyRead;
+
+ /// Either:
+ /// - Reg is FullyDefined and all defs of reg or an overlapping
+ /// register are dead, or
+ /// - Reg is completely dead because "defined" by a clobber.
+ bool DeadDef;
+
+ /// Reg is Defined and all defs of reg or an overlapping register are
+ /// dead.
+ bool PartialDeadDef;
+
+ /// There is a use operand of reg or a super-register with kill flag set.
+ bool Killed;
+};
+
+/// AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses
+/// a physical register. This function should not be called after operator++(),
+/// it expects a fresh iterator.
+///
+/// @param Reg The physical register to analyze.
+/// @returns A filled-in PhysRegInfo struct.
+PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
+ const TargetRegisterInfo *TRI);
+
} // End llvm namespace
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundleIterator.h b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundleIterator.h
index 0f59563..250cb0d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundleIterator.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineInstrBundleIterator.h
@@ -152,8 +152,8 @@
template <class OtherTy>
MachineInstrBundleIterator(
const MachineInstrBundleIterator<OtherTy, IsReverse> &I,
- typename std::enable_if<std::is_convertible<OtherTy *, Ty *>::value,
- void *>::type = nullptr)
+ std::enable_if_t<std::is_convertible<OtherTy *, Ty *>::value, void *> =
+ nullptr)
: MII(I.getInstrIterator()) {}
MachineInstrBundleIterator() : MII(nullptr) {}
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineJumpTableInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineJumpTableInfo.h
index 1178114..1d082bd 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -106,6 +106,9 @@
JumpTables[Idx].MBBs.clear();
}
+ /// RemoveMBBFromJumpTables - If MBB is present in any jump tables, remove it.
+ bool RemoveMBBFromJumpTables(MachineBasicBlock *MBB);
+
/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
/// the jump tables to branch to New instead.
bool ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New);
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineLoopInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineLoopInfo.h
index da6df59..c7491d4 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineLoopInfo.h
@@ -37,6 +37,7 @@
namespace llvm {
+class MachineDominatorTree;
// Implementation in LoopInfoImpl.h
class MachineLoop;
extern template class LoopBase<MachineBasicBlock, MachineLoop>;
@@ -66,6 +67,12 @@
/// it returns an unknown location.
DebugLoc getStartLoc() const;
+ /// Returns true if the instruction is loop invariant.
+ /// I.e., all virtual register operands are defined outside of the loop,
+ /// physical registers aren't accessed explicitly, and there are no side
+ /// effects that aren't captured by the operands or other flags.
+ bool isLoopInvariant(MachineInstr &I) const;
+
void dump() const;
private:
@@ -88,8 +95,10 @@
public:
static char ID; // Pass identification, replacement for typeid
- MachineLoopInfo() : MachineFunctionPass(ID) {
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ MachineLoopInfo();
+ explicit MachineLoopInfo(MachineDominatorTree &MDT)
+ : MachineFunctionPass(ID) {
+ calculate(MDT);
}
MachineLoopInfo(const MachineLoopInfo &) = delete;
MachineLoopInfo &operator=(const MachineLoopInfo &) = delete;
@@ -133,6 +142,7 @@
/// Calculate the natural loop information.
bool runOnMachineFunction(MachineFunction &F) override;
+ void calculate(MachineDominatorTree &MDT);
void releaseMemory() override { LI.releaseMemory(); }
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineLoopUtils.h b/linux-x64/clang/include/llvm/CodeGen/MachineLoopUtils.h
new file mode 100644
index 0000000..ec0b352
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineLoopUtils.h
@@ -0,0 +1,42 @@
+//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+
+namespace llvm {
+class MachineLoop;
+class MachineBasicBlock;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+enum LoopPeelDirection {
+ LPD_Front, ///< Peel the first iteration of the loop.
+ LPD_Back ///< Peel the last iteration of the loop.
+};
+
+/// Peels a single block loop. Loop must have two successors, one of which
+/// must be itself. Similarly it must have two predecessors, one of which must
+/// be itself.
+///
+/// The loop block is copied and inserted into the CFG such that two copies of
+/// the loop follow on from each other. The copy is inserted either before or
+/// after the loop based on Direction.
+///
+/// Phis are updated and an unconditional branch inserted at the end of the
+/// clone so as to execute a single iteration.
+///
+/// The trip count of Loop is not updated.
+MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction,
+ MachineBasicBlock *Loop,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII);
+
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineMemOperand.h b/linux-x64/clang/include/llvm/CodeGen/MachineMemOperand.h
index 65f7063..1befe93 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineMemOperand.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineMemOperand.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Value.h" // PointerLikeTypeTraits<Value*>
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
@@ -58,8 +59,8 @@
AddrSpace = v ? v->getAddressSpace() : 0;
}
- explicit MachinePointerInfo(unsigned AddressSpace = 0)
- : V((const Value *)nullptr), Offset(0), StackID(0),
+ explicit MachinePointerInfo(unsigned AddressSpace = 0, int64_t offset = 0)
+ : V((const Value *)nullptr), Offset(offset), StackID(0),
AddrSpace(AddressSpace) {}
explicit MachinePointerInfo(
@@ -77,10 +78,10 @@
MachinePointerInfo getWithOffset(int64_t O) const {
if (V.isNull())
- return MachinePointerInfo(AddrSpace);
+ return MachinePointerInfo(AddrSpace, Offset + O);
if (V.is<const Value*>())
- return MachinePointerInfo(V.get<const Value*>(), Offset+O, StackID);
- return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O,
+ return MachinePointerInfo(V.get<const Value*>(), Offset + O, StackID);
+ return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset + O,
StackID);
}
@@ -169,7 +170,7 @@
MachinePointerInfo PtrInfo;
uint64_t Size;
Flags FlagVals;
- uint16_t BaseAlignLog2; // log_2(base_alignment) + 1
+ Align BaseAlign;
MachineAtomicInfo AtomicInfo;
AAMDNodes AAInfo;
const MDNode *Ranges;
@@ -181,8 +182,7 @@
/// atomic operations the atomic ordering requirements when store does not
/// occur must also be specified.
MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, uint64_t s,
- uint64_t a,
- const AAMDNodes &AAInfo = AAMDNodes(),
+ Align a, const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr,
SyncScope::ID SSID = SyncScope::System,
AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
@@ -223,13 +223,21 @@
/// Return the size in bits of the memory reference.
uint64_t getSizeInBits() const { return Size * 8; }
+ LLVM_ATTRIBUTE_DEPRECATED(uint64_t getAlignment() const,
+ "Use getAlign instead");
+
/// Return the minimum known alignment in bytes of the actual memory
/// reference.
- uint64_t getAlignment() const;
+ Align getAlign() const;
+
+ LLVM_ATTRIBUTE_DEPRECATED(uint64_t getBaseAlignment() const,
+ "Use getBaseAlign instead") {
+ return BaseAlign.value();
+ }
/// Return the minimum known alignment in bytes of the base address, without
/// the offset.
- uint64_t getBaseAlignment() const { return (1u << BaseAlignLog2) >> 1; }
+ Align getBaseAlign() const { return BaseAlign; }
/// Return the AA tags for the memory reference.
AAMDNodes getAAInfo() const { return AAInfo; }
@@ -293,8 +301,6 @@
/// Support for operator<<.
/// @{
- void print(raw_ostream &OS) const;
- void print(raw_ostream &OS, ModuleSlotTracker &MST) const;
void print(raw_ostream &OS, ModuleSlotTracker &MST,
SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context,
const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const;
@@ -309,7 +315,7 @@
LHS.getFlags() == RHS.getFlags() &&
LHS.getAAInfo() == RHS.getAAInfo() &&
LHS.getRanges() == RHS.getRanges() &&
- LHS.getAlignment() == RHS.getAlignment() &&
+ LHS.getAlign() == RHS.getAlign() &&
LHS.getAddrSpace() == RHS.getAddrSpace();
}
@@ -319,11 +325,6 @@
}
};
-inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) {
- MRO.print(OS);
- return OS;
-}
-
} // End llvm namespace
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineModuleInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineModuleInfo.h
index 4ff5c7f..fa900af 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineModuleInfo.h
@@ -33,6 +33,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
@@ -53,8 +54,8 @@
//===----------------------------------------------------------------------===//
/// This class can be derived from and used by targets to hold private
/// target-specific information for each Module. Objects of type are
-/// accessed/created with MMI::getInfo and destroyed when the MachineModuleInfo
-/// is destroyed.
+/// accessed/created with MachineModuleInfo::getObjFileInfo and destroyed when
+/// the MachineModuleInfo is destroyed.
///
class MachineModuleInfoImpl {
public:
@@ -74,11 +75,17 @@
/// made by different debugging and exception handling schemes and reformated
/// for specific use.
///
-class MachineModuleInfo : public ImmutablePass {
+class MachineModuleInfo {
+ friend class MachineModuleInfoWrapperPass;
+ friend class MachineModuleAnalysis;
+
const LLVMTargetMachine &TM;
/// This is the MCContext used for the entire code generator.
MCContext Context;
+ // This is an external context, that if assigned, will be used instead of the
+ // internal context.
+ MCContext *ExternalContext = nullptr;
/// This is the LLVM Module being worked on.
const Module *TheModule;
@@ -140,28 +147,37 @@
const Function *LastRequest = nullptr; ///< Used for shortcut/cache.
MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache.
+ MachineModuleInfo &operator=(MachineModuleInfo &&MMII) = delete;
+
public:
- static char ID; // Pass identification, replacement for typeid
-
explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr);
- ~MachineModuleInfo() override;
- // Initialization and Finalization
- bool doInitialization(Module &) override;
- bool doFinalization(Module &) override;
+ explicit MachineModuleInfo(const LLVMTargetMachine *TM,
+ MCContext *ExtContext);
+
+ MachineModuleInfo(MachineModuleInfo &&MMII);
+
+ ~MachineModuleInfo();
+
+ void initialize();
+ void finalize();
const LLVMTargetMachine &getTarget() const { return TM; }
- const MCContext &getContext() const { return Context; }
- MCContext &getContext() { return Context; }
+ const MCContext &getContext() const {
+ return ExternalContext ? *ExternalContext : Context;
+ }
+ MCContext &getContext() {
+ return ExternalContext ? *ExternalContext : Context;
+ }
const Module *getModule() const { return TheModule; }
/// Returns the MachineFunction constructed for the IR function \p F.
/// Creates a new MachineFunction if none exists yet.
- MachineFunction &getOrCreateMachineFunction(const Function &F);
+ MachineFunction &getOrCreateMachineFunction(Function &F);
- /// \bried Returns the MachineFunction associated to IR function \p F if there
+ /// \brief Returns the MachineFunction associated to IR function \p F if there
/// is one, otherwise nullptr.
MachineFunction *getMachineFunction(const Function &F) const;
@@ -227,13 +243,6 @@
/// to emit them as well, return the whole set.
ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
- /// If the specified function has had any references to address-taken blocks
- /// generated, but the block got deleted, return the symbol now so we can
- /// emit it. This prevents emitting a reference to a symbol that has no
- /// definition.
- void takeDeletedSymbolsForFunction(const Function *F,
- std::vector<MCSymbol*> &Result);
-
/// \name Exception Handling
/// \{
@@ -252,8 +261,49 @@
return Personalities;
}
/// \}
+
+ // MMI owes MCContext. It should never be invalidated.
+ bool invalidate(Module &, const PreservedAnalyses &,
+ ModuleAnalysisManager::Invalidator &) {
+ return false;
+ }
}; // End class MachineModuleInfo
+class MachineModuleInfoWrapperPass : public ImmutablePass {
+ MachineModuleInfo MMI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr);
+
+ explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM,
+ MCContext *ExtContext);
+
+ // Initialization and Finalization
+ bool doInitialization(Module &) override;
+ bool doFinalization(Module &) override;
+
+ MachineModuleInfo &getMMI() { return MMI; }
+ const MachineModuleInfo &getMMI() const { return MMI; }
+};
+
+/// An analysis that produces \c MachineInfo for a module.
+class MachineModuleAnalysis : public AnalysisInfoMixin<MachineModuleAnalysis> {
+ friend AnalysisInfoMixin<MachineModuleAnalysis>;
+ static AnalysisKey Key;
+
+ const LLVMTargetMachine *TM;
+
+public:
+ /// Provide the result type for this analysis pass.
+ using Result = MachineModuleInfo;
+
+ MachineModuleAnalysis(const LLVMTargetMachine *TM) : TM(TM) {}
+
+ /// Run the analysis pass and produce machine module information.
+ MachineModuleInfo run(Module &M, ModuleAnalysisManager &);
+};
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEMODULEINFO_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineOperand.h b/linux-x64/clang/include/llvm/CodeGen/MachineOperand.h
index 2152c75..b7e89cf 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineOperand.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineOperand.h
@@ -23,6 +23,7 @@
namespace llvm {
class BlockAddress;
+class Constant;
class ConstantFP;
class ConstantInt;
class GlobalValue;
@@ -68,7 +69,8 @@
MO_CFIIndex, ///< MCCFIInstruction index.
MO_IntrinsicID, ///< Intrinsic ID for ISel
MO_Predicate, ///< Generic predicate for ISel
- MO_Last = MO_Predicate,
+ MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks)
+ MO_Last = MO_ShuffleMask
};
private:
@@ -161,7 +163,8 @@
MachineInstr *ParentMI;
/// Contents union - This contains the payload for the various operand types.
- union {
+ union ContentsUnion {
+ ContentsUnion() {}
MachineBasicBlock *MBB; // For MO_MachineBasicBlock.
const ConstantFP *CFP; // For MO_FPImmediate.
const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit.
@@ -172,6 +175,7 @@
unsigned CFIIndex; // For MO_CFI.
Intrinsic::ID IntrinsicID; // For MO_IntrinsicID.
unsigned Pred; // For MO_Predicate
+ ArrayRef<int> ShuffleMask; // For MO_ShuffleMask
struct { // For MO_Register.
// Register number is in SmallContents.RegNo.
@@ -275,6 +279,9 @@
/// More complex way of printing a MachineOperand.
/// \param TypeToPrint specifies the generic type to be printed on uses and
/// defs. It can be determined using MachineInstr::getTypeToPrint.
+ /// \param OpIdx - specifies the index of the operand in machine instruction.
+ /// This will be used by target dependent MIR formatter. Could be None if the
+ /// index is unknown, e.g. called by dump().
/// \param PrintDef - whether we want to print `def` on an operand which
/// isDef. Sometimes, if the operand is printed before '=', we don't print
/// `def`.
@@ -291,8 +298,9 @@
/// information from it's parent.
/// \param IntrinsicInfo - same as \p TRI.
void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
- bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies,
- unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
+ Optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone,
+ bool ShouldPrintRegisterTies, unsigned TiedOperandIdx,
+ const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const;
/// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level
@@ -341,6 +349,7 @@
bool isCFIIndex() const { return OpKind == MO_CFIIndex; }
bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; }
bool isPredicate() const { return OpKind == MO_Predicate; }
+ bool isShuffleMask() const { return OpKind == MO_ShuffleMask; }
//===--------------------------------------------------------------------===//
// Accessors for Register Operands
//===--------------------------------------------------------------------===//
@@ -455,7 +464,7 @@
/// Change the register this operand corresponds to.
///
- void setReg(unsigned Reg);
+ void setReg(Register Reg);
void setSubReg(unsigned subReg) {
assert(isReg() && "Wrong MachineOperand mutator");
@@ -468,13 +477,13 @@
/// using TargetRegisterInfo to compose the subreg indices if necessary.
/// Reg must be a virtual register, SubIdx can be 0.
///
- void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&);
+ void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&);
/// substPhysReg - Substitute the current register with the physical register
/// Reg, taking any existing SubReg into account. For instance,
/// substPhysReg(%eax) will change %reg1024:sub_8bit to %al.
///
- void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
+ void substPhysReg(MCRegister Reg, const TargetRegisterInfo&);
void setIsUse(bool Val = true) { setIsDef(!Val); }
@@ -579,6 +588,11 @@
return Contents.Pred;
}
+ ArrayRef<int> getShuffleMask() const {
+ assert(isShuffleMask() && "Wrong MachineOperand accessor");
+ return Contents.ShuffleMask;
+ }
+
/// Return the offset from the symbol in this operand. This always returns 0
/// for ExternalSymbol operands.
int64_t getOffset() const {
@@ -598,14 +612,14 @@
/// It is sometimes necessary to detach the register mask pointer from its
/// machine operand. This static method can be used for such detached bit
/// mask pointers.
- static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg) {
+ static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg) {
// See TargetRegisterInfo.h.
assert(PhysReg < (1u << 30) && "Not a physical register");
return !(RegMask[PhysReg / 32] & (1u << PhysReg % 32));
}
/// clobbersPhysReg - Returns true if this RegMask operand clobbers PhysReg.
- bool clobbersPhysReg(unsigned PhysReg) const {
+ bool clobbersPhysReg(MCRegister PhysReg) const {
return clobbersPhysReg(getRegMask(), PhysReg);
}
@@ -684,6 +698,11 @@
Contents.RegMask = RegMaskPtr;
}
+ void setIntrinsicID(Intrinsic::ID IID) {
+ assert(isIntrinsicID() && "Wrong MachineOperand mutator");
+ Contents.IntrinsicID = IID;
+ }
+
void setPredicate(unsigned Predicate) {
assert(isPredicate() && "Wrong MachineOperand mutator");
Contents.Pred = Predicate;
@@ -709,37 +728,42 @@
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
- void ChangeToImmediate(int64_t ImmVal);
+ void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0);
/// ChangeToFPImmediate - Replace this operand with a new FP immediate operand
/// of the specified value. If an operand is known to be an FP immediate
/// already, the setFPImm method should be used.
- void ChangeToFPImmediate(const ConstantFP *FPImm);
+ void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0);
/// ChangeToES - Replace this operand with a new external symbol operand.
- void ChangeToES(const char *SymName, unsigned char TargetFlags = 0);
+ void ChangeToES(const char *SymName, unsigned TargetFlags = 0);
/// ChangeToGA - Replace this operand with a new global address operand.
void ChangeToGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
/// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
- void ChangeToMCSymbol(MCSymbol *Sym);
+ void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0);
/// Replace this operand with a frame index.
- void ChangeToFrameIndex(int Idx);
+ void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0);
/// Replace this operand with a target index.
void ChangeToTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
- void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false,
+ void ChangeToRegister(Register Reg, bool isDef, bool isImp = false,
bool isKill = false, bool isDead = false,
bool isUndef = false, bool isDebug = false);
+ /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a
+ /// name, attempt to get the name. Returns nullptr if the TargetIndex does not
+ /// have a name. Asserts if MO is not a TargetIndex.
+ const char *getTargetIndexName() const;
+
//===--------------------------------------------------------------------===//
// Construction methods.
//===--------------------------------------------------------------------===//
@@ -762,7 +786,7 @@
return Op;
}
- static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false,
+ static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false,
bool isKill = false, bool isDead = false,
bool isUndef = false,
bool isEarlyClobber = false,
@@ -788,7 +812,7 @@
return Op;
}
static MachineOperand CreateMBB(MachineBasicBlock *MBB,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_MachineBasicBlock);
Op.setMBB(MBB);
Op.setTargetFlags(TargetFlags);
@@ -800,7 +824,7 @@
return Op;
}
static MachineOperand CreateCPI(unsigned Idx, int Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_ConstantPoolIndex);
Op.setIndex(Idx);
Op.setOffset(Offset);
@@ -808,21 +832,21 @@
return Op;
}
static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_TargetIndex);
Op.setIndex(Idx);
Op.setOffset(Offset);
Op.setTargetFlags(TargetFlags);
return Op;
}
- static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) {
+ static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_JumpTableIndex);
Op.setIndex(Idx);
Op.setTargetFlags(TargetFlags);
return Op;
}
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_GlobalAddress);
Op.Contents.OffsetedInfo.Val.GV = GV;
Op.setOffset(Offset);
@@ -830,7 +854,7 @@
return Op;
}
static MachineOperand CreateES(const char *SymName,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_ExternalSymbol);
Op.Contents.OffsetedInfo.Val.SymbolName = SymName;
Op.setOffset(0); // Offset is always 0.
@@ -838,7 +862,7 @@
return Op;
}
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_BlockAddress);
Op.Contents.OffsetedInfo.Val.BA = BA;
Op.setOffset(Offset);
@@ -876,7 +900,7 @@
}
static MachineOperand CreateMCSymbol(MCSymbol *Sym,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_MCSymbol);
Op.Contents.Sym = Sym;
Op.setOffset(0);
@@ -902,6 +926,12 @@
return Op;
}
+ static MachineOperand CreateShuffleMask(ArrayRef<int> Mask) {
+ MachineOperand Op(MachineOperand::MO_ShuffleMask);
+ Op.Contents.ShuffleMask = Mask;
+ return Op;
+ }
+
friend class MachineInstr;
friend class MachineRegisterInfo;
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/linux-x64/clang/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index a461a29..8cc5909 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -159,7 +159,7 @@
/// that non-trivial false positives can be quickly detected by the user.
bool allowExtraAnalysis(StringRef PassName) const {
return (
- MF.getFunction().getContext().getRemarkStreamer() ||
+ MF.getFunction().getContext().getLLVMRemarkStreamer() ||
MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(
PassName));
}
@@ -172,7 +172,7 @@
// remarks enabled. We can't currently check whether remarks are requested
// for the calling pass since that requires actually building the remark.
- if (MF.getFunction().getContext().getRemarkStreamer() ||
+ if (MF.getFunction().getContext().getLLVMRemarkStreamer() ||
MF.getFunction()
.getContext()
.getDiagHandlerPtr()
@@ -182,6 +182,10 @@
}
}
+ MachineBlockFrequencyInfo *getBFI() {
+ return MBFI;
+ }
+
private:
MachineFunction &MF;
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineOutliner.h b/linux-x64/clang/include/llvm/CodeGen/MachineOutliner.h
index 377df4e..a5dbbdb 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineOutliner.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineOutliner.h
@@ -15,10 +15,11 @@
#ifndef LLVM_MACHINEOUTLINER_H
#define LLVM_MACHINEOUTLINER_H
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
namespace llvm {
namespace outliner {
@@ -37,10 +38,10 @@
struct Candidate {
private:
/// The start index of this \p Candidate in the instruction list.
- unsigned StartIdx;
+ unsigned StartIdx = 0;
/// The number of instructions in this \p Candidate.
- unsigned Len;
+ unsigned Len = 0;
// The first instruction in this \p Candidate.
MachineBasicBlock::iterator FirstInst;
@@ -49,20 +50,20 @@
MachineBasicBlock::iterator LastInst;
// The basic block that contains this Candidate.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
/// Cost of calling an outlined function from this point as defined by the
/// target.
- unsigned CallOverhead;
+ unsigned CallOverhead = 0;
public:
/// The index of this \p Candidate's \p OutlinedFunction in the list of
/// \p OutlinedFunctions.
- unsigned FunctionIdx;
+ unsigned FunctionIdx = 0;
/// Identifier denoting the instructions to emit to call an outlined function
/// from this point. Defined by the target.
- unsigned CallConstructionID;
+ unsigned CallConstructionID = 0;
/// Contains physical register liveness information for the MBB containing
/// this \p Candidate.
@@ -171,13 +172,13 @@
/// Represents the size of a sequence in bytes. (Some instructions vary
/// widely in size, so just counting the instructions isn't very useful.)
- unsigned SequenceSize;
+ unsigned SequenceSize = 0;
/// Target-defined overhead of constructing a frame for this function.
- unsigned FrameOverhead;
+ unsigned FrameOverhead = 0;
/// Target-defined identifier for constructing a frame for this function.
- unsigned FrameConstructionID;
+ unsigned FrameConstructionID = 0;
/// Return the number of candidates for this \p OutlinedFunction.
unsigned getOccurrenceCount() const { return Candidates.size(); }
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachinePassManager.h b/linux-x64/clang/include/llvm/CodeGen/MachinePassManager.h
new file mode 100644
index 0000000..1489177
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MachinePassManager.h
@@ -0,0 +1,256 @@
+//===- PassManager.h --- Pass management for CodeGen ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the pass manager interface for codegen. The codegen
+// pipeline consists of only machine function passes. There is no container
+// relationship between IR module/function and machine function in terms of pass
+// manager organization. So there is no need for adaptor classes (for example
+// ModuleToMachineFunctionAdaptor). Since invalidation could only happen among
+// machine function passes, there is no proxy classes to handle cross-IR-unit
+// invalidation. IR analysis results are provided for machine function passes by
+// their respective analysis managers such as ModuleAnalysisManager and
+// FunctionAnalysisManager.
+//
+// TODO: Add MachineFunctionProperties support.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEPASSMANAGER_H
+#define LLVM_CODEGEN_MACHINEPASSMANAGER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/type_traits.h"
+
+namespace llvm {
+class Module;
+
+extern template class AnalysisManager<MachineFunction>;
+
+/// An AnalysisManager<MachineFunction> that also exposes IR analysis results.
+class MachineFunctionAnalysisManager : public AnalysisManager<MachineFunction> {
+public:
+ using Base = AnalysisManager<MachineFunction>;
+
+ MachineFunctionAnalysisManager() : Base(false), FAM(nullptr), MAM(nullptr) {}
+ MachineFunctionAnalysisManager(FunctionAnalysisManager &FAM,
+ ModuleAnalysisManager &MAM,
+ bool DebugLogging = false)
+ : Base(DebugLogging), FAM(&FAM), MAM(&MAM) {}
+ MachineFunctionAnalysisManager(MachineFunctionAnalysisManager &&) = default;
+ MachineFunctionAnalysisManager &
+ operator=(MachineFunctionAnalysisManager &&) = default;
+
+ /// Get the result of an analysis pass for a Function.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ template <typename PassT> typename PassT::Result &getResult(Function &F) {
+ return FAM->getResult<PassT>(F);
+ }
+
+ /// Get the cached result of an analysis pass for a Function.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// \returns null if there is no cached result.
+ template <typename PassT>
+ typename PassT::Result *getCachedResult(Function &F) {
+ return FAM->getCachedResult<PassT>(F);
+ }
+
+ /// Get the result of an analysis pass for a Module.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ template <typename PassT> typename PassT::Result &getResult(Module &M) {
+ return MAM->getResult<PassT>(M);
+ }
+
+ /// Get the cached result of an analysis pass for a Module.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// \returns null if there is no cached result.
+ template <typename PassT> typename PassT::Result *getCachedResult(Module &M) {
+ return MAM->getCachedResult<PassT>(M);
+ }
+
+ /// Get the result of an analysis pass for a MachineFunction.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ using Base::getResult;
+
+ /// Get the cached result of an analysis pass for a MachineFunction.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// returns null if there is no cached result.
+ using Base::getCachedResult;
+
+ // FIXME: Add LoopAnalysisManager or CGSCCAnalysisManager if needed.
+ FunctionAnalysisManager *FAM;
+ ModuleAnalysisManager *MAM;
+};
+
+extern template class PassManager<MachineFunction>;
+
+/// MachineFunctionPassManager adds/removes below features to/from the base
+/// PassManager template instantiation.
+///
+/// - Support passes that implement doInitialization/doFinalization. This is for
+/// machine function passes to work on module level constructs. One such pass
+/// is AsmPrinter.
+///
+/// - Support machine module pass which runs over the module (for example,
+/// MachineOutliner). A machine module pass needs to define the method:
+///
+/// ```Error run(Module &, MachineFunctionAnalysisManager &)```
+///
+/// FIXME: machine module passes still need to define the usual machine
+/// function pass interface, namely,
+/// `PreservedAnalyses run(MachineFunction &,
+/// MachineFunctionAnalysisManager &)`
+/// But this interface wouldn't be executed. It is just a placeholder
+/// to satisfy the pass manager type-erased inteface. This
+/// special-casing of machine module pass is due to its limited use
+/// cases and the unnecessary complexity it may bring to the machine
+/// pass manager.
+///
+/// - The base class `run` method is replaced by an alternative `run` method.
+/// See details below.
+///
+/// - Support codegening in the SCC order. Users include interprocedural
+/// register allocation (IPRA).
+class MachineFunctionPassManager
+ : public PassManager<MachineFunction, MachineFunctionAnalysisManager> {
+ using Base = PassManager<MachineFunction, MachineFunctionAnalysisManager>;
+
+public:
+ MachineFunctionPassManager(bool DebugLogging = false,
+ bool RequireCodeGenSCCOrder = false,
+ bool VerifyMachineFunction = false)
+ : Base(DebugLogging), RequireCodeGenSCCOrder(RequireCodeGenSCCOrder),
+ VerifyMachineFunction(VerifyMachineFunction) {}
+ MachineFunctionPassManager(MachineFunctionPassManager &&) = default;
+ MachineFunctionPassManager &
+ operator=(MachineFunctionPassManager &&) = default;
+
+ /// Run machine passes for a Module.
+ ///
+ /// The intended use is to start the codegen pipeline for a Module. The base
+ /// class's `run` method is deliberately hidden by this due to the observation
+ /// that we don't yet have the use cases of compositing two instances of
+ /// machine pass managers, or compositing machine pass managers with other
+ /// types of pass managers.
+ Error run(Module &M, MachineFunctionAnalysisManager &MFAM);
+
+ template <typename PassT> void addPass(PassT &&Pass) {
+ Base::addPass(std::forward<PassT>(Pass));
+ PassConceptT *P = Passes.back().get();
+ addDoInitialization<PassT>(P);
+ addDoFinalization<PassT>(P);
+
+ // Add machine module pass.
+ addRunOnModule<PassT>(P);
+ }
+
+private:
+ template <typename PassT>
+ using has_init_t = decltype(std::declval<PassT &>().doInitialization(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<has_init_t, PassT>::value>
+ addDoInitialization(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<has_init_t, PassT>::value>
+ addDoInitialization(PassConceptT *Pass) {
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ InitializationFuncs.emplace_back(
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.doInitialization(M, MFAM);
+ });
+ }
+
+ template <typename PassT>
+ using has_fini_t = decltype(std::declval<PassT &>().doFinalization(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<has_fini_t, PassT>::value>
+ addDoFinalization(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<has_fini_t, PassT>::value>
+ addDoFinalization(PassConceptT *Pass) {
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ FinalizationFuncs.emplace_back(
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.doFinalization(M, MFAM);
+ });
+ }
+
+ template <typename PassT>
+ using is_machine_module_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ using is_machine_function_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<MachineFunction &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<is_machine_module_pass_t, PassT>::value>
+ addRunOnModule(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_machine_module_pass_t, PassT>::value>
+ addRunOnModule(PassConceptT *Pass) {
+ static_assert(is_detected<is_machine_function_pass_t, PassT>::value,
+ "machine module pass needs to define machine function pass "
+ "api. sorry.");
+
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ MachineModulePasses.emplace(
+ Passes.size() - 1,
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.run(M, MFAM);
+ });
+ }
+
+ using FuncTy = Error(Module &, MachineFunctionAnalysisManager &);
+ SmallVector<llvm::unique_function<FuncTy>, 4> InitializationFuncs;
+ SmallVector<llvm::unique_function<FuncTy>, 4> FinalizationFuncs;
+
+ using PassIndex = decltype(Passes)::size_type;
+ std::map<PassIndex, llvm::unique_function<FuncTy>> MachineModulePasses;
+
+ // Run codegen in the SCC order.
+ bool RequireCodeGenSCCOrder;
+
+ bool VerifyMachineFunction;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEPASSMANAGER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachinePassRegistry.def b/linux-x64/clang/include/llvm/CodeGen/MachinePassRegistry.def
new file mode 100644
index 0000000..e9eaa5f
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MachinePassRegistry.def
@@ -0,0 +1,197 @@
+//===- MachinePassRegistry.def - Registry of passes -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are for target-independent
+// code generator.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+#undef MODULE_ANALYSIS
+
+#ifndef MODULE_PASS
+#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ())
+#undef MODULE_PASS
+
+#ifndef FUNCTION_ANALYSIS
+#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis())))
+#undef FUNCTION_ANALYSIS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
+FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
+FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
+FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
+FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
+FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
+FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
+FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
+FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
+FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ())
+FUNCTION_PASS("verify", VerifierPass, ())
+#undef FUNCTION_PASS
+
+#ifndef LOOP_PASS
+#define LOOP_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+LOOP_PASS("loop-reduce", LoopStrengthReducePass, ())
+#undef LOOP_PASS
+
+#ifndef MACHINE_MODULE_PASS
+#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+#undef MACHINE_MODULE_PASS
+
+#ifndef MACHINE_FUNCTION_ANALYSIS
+#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+// LiveVariables currently requires pure SSA form.
+// FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+// LiveVariables can be removed completely, and LiveIntervals can be directly
+// computed. (We still either need to regenerate kill flags after regalloc, or
+// preferably fix the scavenger to not depend on them).
+// MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis())
+
+// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass())
+// MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass())
+#undef MACHINE_FUNCTION_ANALYSIS
+
+#ifndef MACHINE_FUNCTION_PASS
+#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+// MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ())
+// MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ())
+#undef MACHINE_FUNCTION_PASS
+
+// After a pass is converted to new pass manager, its entry should be moved from
+// dummy table to the normal one. For example, for a machine function pass,
+// DUMMY_MACHINE_FUNCTION_PASS to MACHINE_FUNCTION_PASS.
+
+#ifndef DUMMY_FUNCTION_PASS
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
+DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
+DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
+DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
+DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ())
+DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ())
+DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ())
+DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ())
+DUMMY_FUNCTION_PASS("safe-stack", SafeStackPass, ())
+DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ())
+DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ())
+DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ())
+DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ())
+DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ())
+DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ())
+DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ())
+#undef DUMMY_FUNCTION_PASS
+
+#ifndef DUMMY_MODULE_PASS
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MODULE_PASS("lower-emutls", LowerEmuTLSPass, ())
+#undef DUMMY_MODULE_PASS
+
+#ifndef DUMMY_MACHINE_MODULE_PASS
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ())
+#undef DUMMY_MACHINE_MODULE_PASS
+
+#ifndef DUMMY_MACHINE_FUNCTION_PASS
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ())
+DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ())
+DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ())
+#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachinePipeliner.h b/linux-x64/clang/include/llvm/CodeGen/MachinePipeliner.h
index 03ca530..f89a453 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachinePipeliner.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachinePipeliner.h
@@ -41,12 +41,15 @@
#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
+class AAResults;
class NodeSet;
class SMSchedule;
@@ -57,6 +60,7 @@
class MachinePipeliner : public MachineFunctionPass {
public:
MachineFunction *MF = nullptr;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
const MachineLoopInfo *MLI = nullptr;
const MachineDominatorTree *MDT = nullptr;
const InstrItineraryData *InstrItins;
@@ -87,14 +91,7 @@
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
void preprocessPhiNodes(MachineBasicBlock &B);
@@ -148,7 +145,7 @@
/// We may create a new instruction, so remember it because it
/// must be deleted when the pass is finished.
- SmallPtrSet<MachineInstr *, 4> NewMIs;
+ DenseMap<MachineInstr*, MachineInstr *> NewMIs;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
@@ -200,7 +197,7 @@
RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
if (SwpEnableCopyToPhi)
- Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
+ Mutations.push_back(std::make_unique<CopyToPhiMutation>());
}
void schedule() override;
@@ -279,7 +276,7 @@
static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
private:
- void addLoopCarriedDependences(AliasAnalysis *AA);
+ void addLoopCarriedDependences(AAResults *AA);
void updatePhiDependences();
void changeDependences();
unsigned calculateResMII();
@@ -297,53 +294,8 @@
void computeNodeOrder(NodeSetType &NodeSets);
void checkValidNodeOrder(const NodeSetType &Circuits) const;
bool schedulePipeline(SMSchedule &Schedule);
- void generatePipelinedLoop(SMSchedule &Schedule);
- void generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs);
- void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
- void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs);
- void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule);
- void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
- MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule, ValueMapTy *VRMap);
bool computeDelta(MachineInstr &MI, unsigned &Delta);
- void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
- unsigned Num);
- MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum);
- MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule);
- void updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum, unsigned InstrStageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap);
- MachineInstr *findDefInLoop(unsigned Reg);
- unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
- unsigned LoopStage, ValueMapTy *VRMap,
- MachineBasicBlock *BB);
- void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap);
- void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
- InstrMapTy &InstrMap, unsigned CurStageNum,
- unsigned PhiNum, MachineInstr *Phi,
- unsigned OldReg, unsigned NewReg,
- unsigned PrevReg = 0);
+ MachineInstr *findDefInLoop(Register Reg);
bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
unsigned &OffsetPos, unsigned &NewBase,
int64_t &NewOffset);
@@ -372,10 +324,22 @@
NodeSet() = default;
NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
Latency = 0;
- for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
- for (const SDep &Succ : Nodes[i]->Succs)
- if (Nodes.count(Succ.getSUnit()))
- Latency += Succ.getLatency();
+ for (unsigned i = 0, e = Nodes.size(); i < e; ++i) {
+ DenseMap<SUnit *, unsigned> SuccSUnitLatency;
+ for (const SDep &Succ : Nodes[i]->Succs) {
+ auto SuccSUnit = Succ.getSUnit();
+ if (!Nodes.count(SuccSUnit))
+ continue;
+ unsigned CurLatency = Succ.getLatency();
+ unsigned MaxLatency = 0;
+ if (SuccSUnitLatency.count(SuccSUnit))
+ MaxLatency = SuccSUnitLatency[SuccSUnit];
+ if (CurLatency > MaxLatency)
+ SuccSUnitLatency[SuccSUnit] = CurLatency;
+ }
+ for (auto SUnitLatency : SuccSUnitLatency)
+ Latency += SUnitLatency.second;
+ }
}
bool insert(SUnit *SU) { return Nodes.insert(SU); }
@@ -529,12 +493,6 @@
/// Map from instruction to execution cycle.
std::map<SUnit *, int> InstrToCycle;
- /// Map for each register and the max difference between its uses and def.
- /// The first element in the pair is the max difference in stages. The
- /// second is true if the register defines a Phi value and loop value is
- /// scheduled before the Phi.
- std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
-
/// Keep track of the first cycle value in the schedule. It starts
/// as zero, but the algorithm allows negative values.
int FirstCycle = 0;
@@ -560,7 +518,6 @@
void reset() {
ScheduledInstrs.clear();
InstrToCycle.clear();
- RegToStageDiff.clear();
FirstCycle = 0;
LastCycle = 0;
InitiationInterval = 0;
@@ -620,28 +577,6 @@
return (LastCycle - FirstCycle) / InitiationInterval;
}
- /// Return the max. number of stages/iterations that can occur between a
- /// register definition and its uses.
- unsigned getStagesForReg(int Reg, unsigned CurStage) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
- return 1;
- return Stages.first;
- }
-
- /// The number of stages for a Phi is a little different than other
- /// instructions. The minimum value computed in RegToStageDiff is 1
- /// because we assume the Phi is needed for at least 1 iteration.
- /// This is not the case if the loop value is scheduled prior to the
- /// Phi in the same stage. This function returns the number of stages
- /// or iterations needed between the Phi definition and any uses.
- unsigned getStagesForPhi(int Reg) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (Stages.second)
- return Stages.first;
- return Stages.first - 1;
- }
-
/// Return the instructions that are scheduled at the specified cycle.
std::deque<SUnit *> &getInstructions(int cycle) {
return ScheduledInstrs[cycle];
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachinePostDominators.h b/linux-x64/clang/include/llvm/CodeGen/MachinePostDominators.h
index b67e6b5..cee4294 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachinePostDominators.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachinePostDominators.h
@@ -16,68 +16,78 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <memory>
namespace llvm {
///
-/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
-/// to compute the post-dominator tree.
+/// MachinePostDominatorTree - an analysis pass wrapper for DominatorTree
+/// used to compute the post-dominator tree for MachineFunctions.
///
-struct MachinePostDominatorTree : public MachineFunctionPass {
-private:
- PostDomTreeBase<MachineBasicBlock> *DT;
+class MachinePostDominatorTree : public MachineFunctionPass {
+ using PostDomTreeT = PostDomTreeBase<MachineBasicBlock>;
+ std::unique_ptr<PostDomTreeT> PDT;
public:
static char ID;
MachinePostDominatorTree();
- ~MachinePostDominatorTree() override;
+ PostDomTreeT &getBase() {
+ if (!PDT)
+ PDT.reset(new PostDomTreeT());
+ return *PDT;
+ }
FunctionPass *createMachinePostDominatorTreePass();
- const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
- return DT->getRoots();
- }
-
- MachineDomTreeNode *getRootNode() const {
- return DT->getRootNode();
- }
+ MachineDomTreeNode *getRootNode() const { return PDT->getRootNode(); }
MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
- return DT->getNode(BB);
+ return PDT->getNode(BB);
}
MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
- return DT->getNode(BB);
+ return PDT->getNode(BB);
}
bool dominates(const MachineDomTreeNode *A,
const MachineDomTreeNode *B) const {
- return DT->dominates(A, B);
+ return PDT->dominates(A, B);
}
bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
- return DT->dominates(A, B);
+ return PDT->dominates(A, B);
}
bool properlyDominates(const MachineDomTreeNode *A,
const MachineDomTreeNode *B) const {
- return DT->properlyDominates(A, B);
+ return PDT->properlyDominates(A, B);
}
bool properlyDominates(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
- return DT->properlyDominates(A, B);
+ return PDT->properlyDominates(A, B);
+ }
+
+ bool isVirtualRoot(const MachineDomTreeNode *Node) const {
+ return PDT->isVirtualRoot(Node);
}
MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
- MachineBasicBlock *B) {
- return DT->findNearestCommonDominator(A, B);
+ MachineBasicBlock *B) const {
+ return PDT->findNearestCommonDominator(A, B);
}
+ /// Returns the nearest common dominator of the given blocks.
+ /// If that tree node is a virtual root, a nullptr will be returned.
+ MachineBasicBlock *
+ findNearestCommonDominator(ArrayRef<MachineBasicBlock *> Blocks) const;
+
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override { PDT.reset(nullptr); }
+ void verifyAnalysis() const override;
void print(llvm::raw_ostream &OS, const Module *M = nullptr) const override;
};
} //end of namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineRegionInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineRegionInfo.h
index 6d9fb9b..eeb69fe 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineRegionInfo.h
@@ -22,7 +22,7 @@
namespace llvm {
-struct MachinePostDominatorTree;
+class MachinePostDominatorTree;
class MachineRegion;
class MachineRegionNode;
class MachineRegionInfo;
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineRegisterInfo.h b/linux-x64/clang/include/llvm/CodeGen/MachineRegisterInfo.h
index b5deed1..a1a6705 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -57,7 +57,7 @@
public:
virtual ~Delegate() = default;
- virtual void MRI_NoteNewVirtualRegister(unsigned Reg) = 0;
+ virtual void MRI_NoteNewVirtualRegister(Register Reg) = 0;
};
private:
@@ -98,7 +98,7 @@
/// first member of the pair being non-zero. If the hinted register is
/// virtual, it means the allocator should prefer the physical register
/// allocated to it if any.
- IndexedMap<std::pair<unsigned, SmallVector<unsigned, 4>>,
+ IndexedMap<std::pair<Register, SmallVector<Register, 4>>,
VirtReg2IndexFunctor> RegAllocHints;
/// PhysRegUseDefLists - This is an array of the head of the use/def list for
@@ -107,16 +107,16 @@
/// getRegUseDefListHead - Return the head pointer for the register use/def
/// list for the specified virtual or physical register.
- MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
- if (TargetRegisterInfo::isVirtualRegister(RegNo))
- return VRegInfo[RegNo].second;
- return PhysRegUseDefLists[RegNo];
+ MachineOperand *&getRegUseDefListHead(Register RegNo) {
+ if (RegNo.isVirtual())
+ return VRegInfo[RegNo.id()].second;
+ return PhysRegUseDefLists[RegNo.id()];
}
- MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
- if (TargetRegisterInfo::isVirtualRegister(RegNo))
- return VRegInfo[RegNo].second;
- return PhysRegUseDefLists[RegNo];
+ MachineOperand *getRegUseDefListHead(Register RegNo) const {
+ if (RegNo.isVirtual())
+ return VRegInfo[RegNo.id()].second;
+ return PhysRegUseDefLists[RegNo.id()];
}
/// Get the next element in the use-def chain.
@@ -143,7 +143,7 @@
/// Live in values are typically arguments in registers. LiveIn values are
/// allowed to have virtual registers associated with them, stored in the
/// second element.
- std::vector<std::pair<unsigned, unsigned>> LiveIns;
+ std::vector<std::pair<MCRegister, Register>> LiveIns;
public:
explicit MachineRegisterInfo(MachineFunction *MF);
@@ -214,8 +214,8 @@
bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const {
return subRegLivenessEnabled() && RC.HasDisjunctSubRegs;
}
- bool shouldTrackSubRegLiveness(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Must pass a VReg");
+ bool shouldTrackSubRegLiveness(Register VReg) const {
+ assert(VReg.isVirtual() && "Must pass a VReg");
return shouldTrackSubRegLiveness(*getRegClass(VReg));
}
bool subRegLivenessEnabled() const {
@@ -232,7 +232,7 @@
/// Disables the register from the list of CSRs.
/// I.e. the register will not appear as part of the CSR mask.
/// \see UpdatedCalleeSavedRegs.
- void disableCalleeSavedRegister(unsigned Reg);
+ void disableCalleeSavedRegister(MCRegister Reg);
/// Returns list of callee saved registers.
/// The function returns the updated CSR list (after taking into account
@@ -253,7 +253,7 @@
void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps);
/// Verify the sanity of the use list for Reg.
- void verifyUseList(unsigned Reg) const;
+ void verifyUseList(Register Reg) const;
/// Verify the use list of all registers.
void verifyUseLists() const;
@@ -278,12 +278,12 @@
/// register.
using reg_iterator =
defusechain_iterator<true, true, false, true, false, false>;
- reg_iterator reg_begin(unsigned RegNo) const {
+ reg_iterator reg_begin(Register RegNo) const {
return reg_iterator(getRegUseDefListHead(RegNo));
}
static reg_iterator reg_end() { return reg_iterator(nullptr); }
- inline iterator_range<reg_iterator> reg_operands(unsigned Reg) const {
+ inline iterator_range<reg_iterator> reg_operands(Register Reg) const {
return make_range(reg_begin(Reg), reg_end());
}
@@ -291,7 +291,7 @@
/// of the specified register, stepping by MachineInstr.
using reg_instr_iterator =
defusechain_instr_iterator<true, true, false, false, true, false>;
- reg_instr_iterator reg_instr_begin(unsigned RegNo) const {
+ reg_instr_iterator reg_instr_begin(Register RegNo) const {
return reg_instr_iterator(getRegUseDefListHead(RegNo));
}
static reg_instr_iterator reg_instr_end() {
@@ -299,7 +299,7 @@
}
inline iterator_range<reg_instr_iterator>
- reg_instructions(unsigned Reg) const {
+ reg_instructions(Register Reg) const {
return make_range(reg_instr_begin(Reg), reg_instr_end());
}
@@ -307,26 +307,26 @@
/// of the specified register, stepping by bundle.
using reg_bundle_iterator =
defusechain_instr_iterator<true, true, false, false, false, true>;
- reg_bundle_iterator reg_bundle_begin(unsigned RegNo) const {
+ reg_bundle_iterator reg_bundle_begin(Register RegNo) const {
return reg_bundle_iterator(getRegUseDefListHead(RegNo));
}
static reg_bundle_iterator reg_bundle_end() {
return reg_bundle_iterator(nullptr);
}
- inline iterator_range<reg_bundle_iterator> reg_bundles(unsigned Reg) const {
+ inline iterator_range<reg_bundle_iterator> reg_bundles(Register Reg) const {
return make_range(reg_bundle_begin(Reg), reg_bundle_end());
}
/// reg_empty - Return true if there are no instructions using or defining the
/// specified register (it may be live-in).
- bool reg_empty(unsigned RegNo) const { return reg_begin(RegNo) == reg_end(); }
+ bool reg_empty(Register RegNo) const { return reg_begin(RegNo) == reg_end(); }
/// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses
/// of the specified register, skipping those marked as Debug.
using reg_nodbg_iterator =
defusechain_iterator<true, true, true, true, false, false>;
- reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const {
+ reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const {
return reg_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static reg_nodbg_iterator reg_nodbg_end() {
@@ -334,7 +334,7 @@
}
inline iterator_range<reg_nodbg_iterator>
- reg_nodbg_operands(unsigned Reg) const {
+ reg_nodbg_operands(Register Reg) const {
return make_range(reg_nodbg_begin(Reg), reg_nodbg_end());
}
@@ -343,7 +343,7 @@
/// skipping those marked as Debug.
using reg_instr_nodbg_iterator =
defusechain_instr_iterator<true, true, true, false, true, false>;
- reg_instr_nodbg_iterator reg_instr_nodbg_begin(unsigned RegNo) const {
+ reg_instr_nodbg_iterator reg_instr_nodbg_begin(Register RegNo) const {
return reg_instr_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static reg_instr_nodbg_iterator reg_instr_nodbg_end() {
@@ -351,7 +351,7 @@
}
inline iterator_range<reg_instr_nodbg_iterator>
- reg_nodbg_instructions(unsigned Reg) const {
+ reg_nodbg_instructions(Register Reg) const {
return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end());
}
@@ -360,7 +360,7 @@
/// skipping those marked as Debug.
using reg_bundle_nodbg_iterator =
defusechain_instr_iterator<true, true, true, false, false, true>;
- reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(unsigned RegNo) const {
+ reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(Register RegNo) const {
return reg_bundle_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static reg_bundle_nodbg_iterator reg_bundle_nodbg_end() {
@@ -368,25 +368,25 @@
}
inline iterator_range<reg_bundle_nodbg_iterator>
- reg_nodbg_bundles(unsigned Reg) const {
+ reg_nodbg_bundles(Register Reg) const {
return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end());
}
/// reg_nodbg_empty - Return true if the only instructions using or defining
/// Reg are Debug instructions.
- bool reg_nodbg_empty(unsigned RegNo) const {
+ bool reg_nodbg_empty(Register RegNo) const {
return reg_nodbg_begin(RegNo) == reg_nodbg_end();
}
/// def_iterator/def_begin/def_end - Walk all defs of the specified register.
using def_iterator =
defusechain_iterator<false, true, false, true, false, false>;
- def_iterator def_begin(unsigned RegNo) const {
+ def_iterator def_begin(Register RegNo) const {
return def_iterator(getRegUseDefListHead(RegNo));
}
static def_iterator def_end() { return def_iterator(nullptr); }
- inline iterator_range<def_iterator> def_operands(unsigned Reg) const {
+ inline iterator_range<def_iterator> def_operands(Register Reg) const {
return make_range(def_begin(Reg), def_end());
}
@@ -394,7 +394,7 @@
/// specified register, stepping by MachineInst.
using def_instr_iterator =
defusechain_instr_iterator<false, true, false, false, true, false>;
- def_instr_iterator def_instr_begin(unsigned RegNo) const {
+ def_instr_iterator def_instr_begin(Register RegNo) const {
return def_instr_iterator(getRegUseDefListHead(RegNo));
}
static def_instr_iterator def_instr_end() {
@@ -402,7 +402,7 @@
}
inline iterator_range<def_instr_iterator>
- def_instructions(unsigned Reg) const {
+ def_instructions(Register Reg) const {
return make_range(def_instr_begin(Reg), def_instr_end());
}
@@ -410,26 +410,26 @@
/// specified register, stepping by bundle.
using def_bundle_iterator =
defusechain_instr_iterator<false, true, false, false, false, true>;
- def_bundle_iterator def_bundle_begin(unsigned RegNo) const {
+ def_bundle_iterator def_bundle_begin(Register RegNo) const {
return def_bundle_iterator(getRegUseDefListHead(RegNo));
}
static def_bundle_iterator def_bundle_end() {
return def_bundle_iterator(nullptr);
}
- inline iterator_range<def_bundle_iterator> def_bundles(unsigned Reg) const {
+ inline iterator_range<def_bundle_iterator> def_bundles(Register Reg) const {
return make_range(def_bundle_begin(Reg), def_bundle_end());
}
/// def_empty - Return true if there are no instructions defining the
/// specified register (it may be live-in).
- bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); }
+ bool def_empty(Register RegNo) const { return def_begin(RegNo) == def_end(); }
- StringRef getVRegName(unsigned Reg) const {
+ StringRef getVRegName(Register Reg) const {
return VReg2Name.inBounds(Reg) ? StringRef(VReg2Name[Reg]) : "";
}
- void insertVRegByName(StringRef Name, unsigned Reg) {
+ void insertVRegByName(StringRef Name, Register Reg) {
assert((Name.empty() || VRegNames.find(Name) == VRegNames.end()) &&
"Named VRegs Must be Unique.");
if (!Name.empty()) {
@@ -441,22 +441,35 @@
/// Return true if there is exactly one operand defining the specified
/// register.
- bool hasOneDef(unsigned RegNo) const {
+ bool hasOneDef(Register RegNo) const {
def_iterator DI = def_begin(RegNo);
if (DI == def_end())
return false;
return ++DI == def_end();
}
+ /// Returns the defining operand if there is exactly one operand defining the
+ /// specified register, otherwise nullptr.
+ MachineOperand *getOneDef(Register Reg) const {
+ def_iterator DI = def_begin(Reg);
+ if (DI == def_end()) // No defs.
+ return nullptr;
+
+ def_iterator OneDef = DI;
+ if (++DI == def_end())
+ return &*OneDef;
+ return nullptr; // Multiple defs.
+ }
+
/// use_iterator/use_begin/use_end - Walk all uses of the specified register.
using use_iterator =
defusechain_iterator<true, false, false, true, false, false>;
- use_iterator use_begin(unsigned RegNo) const {
+ use_iterator use_begin(Register RegNo) const {
return use_iterator(getRegUseDefListHead(RegNo));
}
static use_iterator use_end() { return use_iterator(nullptr); }
- inline iterator_range<use_iterator> use_operands(unsigned Reg) const {
+ inline iterator_range<use_iterator> use_operands(Register Reg) const {
return make_range(use_begin(Reg), use_end());
}
@@ -464,7 +477,7 @@
/// specified register, stepping by MachineInstr.
using use_instr_iterator =
defusechain_instr_iterator<true, false, false, false, true, false>;
- use_instr_iterator use_instr_begin(unsigned RegNo) const {
+ use_instr_iterator use_instr_begin(Register RegNo) const {
return use_instr_iterator(getRegUseDefListHead(RegNo));
}
static use_instr_iterator use_instr_end() {
@@ -472,7 +485,7 @@
}
inline iterator_range<use_instr_iterator>
- use_instructions(unsigned Reg) const {
+ use_instructions(Register Reg) const {
return make_range(use_instr_begin(Reg), use_instr_end());
}
@@ -480,24 +493,24 @@
/// specified register, stepping by bundle.
using use_bundle_iterator =
defusechain_instr_iterator<true, false, false, false, false, true>;
- use_bundle_iterator use_bundle_begin(unsigned RegNo) const {
+ use_bundle_iterator use_bundle_begin(Register RegNo) const {
return use_bundle_iterator(getRegUseDefListHead(RegNo));
}
static use_bundle_iterator use_bundle_end() {
return use_bundle_iterator(nullptr);
}
- inline iterator_range<use_bundle_iterator> use_bundles(unsigned Reg) const {
+ inline iterator_range<use_bundle_iterator> use_bundles(Register Reg) const {
return make_range(use_bundle_begin(Reg), use_bundle_end());
}
/// use_empty - Return true if there are no instructions using the specified
/// register.
- bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); }
+ bool use_empty(Register RegNo) const { return use_begin(RegNo) == use_end(); }
/// hasOneUse - Return true if there is exactly one instruction using the
/// specified register.
- bool hasOneUse(unsigned RegNo) const {
+ bool hasOneUse(Register RegNo) const {
use_iterator UI = use_begin(RegNo);
if (UI == use_end())
return false;
@@ -508,7 +521,7 @@
/// specified register, skipping those marked as Debug.
using use_nodbg_iterator =
defusechain_iterator<true, false, true, true, false, false>;
- use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const {
+ use_nodbg_iterator use_nodbg_begin(Register RegNo) const {
return use_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static use_nodbg_iterator use_nodbg_end() {
@@ -516,7 +529,7 @@
}
inline iterator_range<use_nodbg_iterator>
- use_nodbg_operands(unsigned Reg) const {
+ use_nodbg_operands(Register Reg) const {
return make_range(use_nodbg_begin(Reg), use_nodbg_end());
}
@@ -525,7 +538,7 @@
/// those marked as Debug.
using use_instr_nodbg_iterator =
defusechain_instr_iterator<true, false, true, false, true, false>;
- use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const {
+ use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const {
return use_instr_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static use_instr_nodbg_iterator use_instr_nodbg_end() {
@@ -533,7 +546,7 @@
}
inline iterator_range<use_instr_nodbg_iterator>
- use_nodbg_instructions(unsigned Reg) const {
+ use_nodbg_instructions(Register Reg) const {
return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end());
}
@@ -542,7 +555,7 @@
/// those marked as Debug.
using use_bundle_nodbg_iterator =
defusechain_instr_iterator<true, false, true, false, false, true>;
- use_bundle_nodbg_iterator use_bundle_nodbg_begin(unsigned RegNo) const {
+ use_bundle_nodbg_iterator use_bundle_nodbg_begin(Register RegNo) const {
return use_bundle_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static use_bundle_nodbg_iterator use_bundle_nodbg_end() {
@@ -550,25 +563,25 @@
}
inline iterator_range<use_bundle_nodbg_iterator>
- use_nodbg_bundles(unsigned Reg) const {
+ use_nodbg_bundles(Register Reg) const {
return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end());
}
/// use_nodbg_empty - Return true if there are no non-Debug instructions
/// using the specified register.
- bool use_nodbg_empty(unsigned RegNo) const {
+ bool use_nodbg_empty(Register RegNo) const {
return use_nodbg_begin(RegNo) == use_nodbg_end();
}
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
/// use of the specified register.
- bool hasOneNonDBGUse(unsigned RegNo) const;
+ bool hasOneNonDBGUse(Register RegNo) const;
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
/// instruction using the specified register. Said instruction may have
/// multiple uses.
- bool hasOneNonDBGUser(unsigned RegNo) const;
-
+ bool hasOneNonDBGUser(Register RegNo) const;
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -588,38 +601,34 @@
/// Note that if ToReg is a physical register the function will replace and
/// apply sub registers to ToReg in order to obtain a final/proper physical
/// register.
- void replaceRegWith(unsigned FromReg, unsigned ToReg);
+ void replaceRegWith(Register FromReg, Register ToReg);
/// getVRegDef - Return the machine instr that defines the specified virtual
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
- MachineInstr *getVRegDef(unsigned Reg) const;
+ MachineInstr *getVRegDef(Register Reg) const;
/// getUniqueVRegDef - Return the unique machine instr that defines the
/// specified virtual register or null if none is found. If there are
/// multiple definitions or no definition, return null.
- MachineInstr *getUniqueVRegDef(unsigned Reg) const;
+ MachineInstr *getUniqueVRegDef(Register Reg) const;
/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
/// preserve conservative kill flag information.
- void clearKillFlags(unsigned Reg) const;
+ void clearKillFlags(Register Reg) const;
- void dumpUses(unsigned RegNo) const;
+ void dumpUses(Register RegNo) const;
/// Returns true if PhysReg is unallocatable and constant throughout the
/// function. Writing to a constant register has no effect.
- bool isConstantPhysReg(unsigned PhysReg) const;
-
- /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg
- /// returns true. This is a utility member function.
- bool isCallerPreservedOrConstPhysReg(unsigned PhysReg) const;
+ bool isConstantPhysReg(MCRegister PhysReg) const;
/// Get an iterator over the pressure sets affected by the given physical or
/// virtual register. If RegUnit is physical, it must be a register unit (from
/// MCRegUnitIterator).
- PSetIterator getPressureSets(unsigned RegUnit) const;
+ PSetIterator getPressureSets(Register RegUnit) const;
//===--------------------------------------------------------------------===//
// Virtual Register Info
@@ -628,10 +637,10 @@
/// Return the register class of the specified virtual register.
/// This shouldn't be used directly unless \p Reg has a register class.
/// \see getRegClassOrNull when this might happen.
- const TargetRegisterClass *getRegClass(unsigned Reg) const {
- assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() &&
+ const TargetRegisterClass *getRegClass(Register Reg) const {
+ assert(VRegInfo[Reg.id()].first.is<const TargetRegisterClass *>() &&
"Register class not set, wrong accessor");
- return VRegInfo[Reg].first.get<const TargetRegisterClass *>();
+ return VRegInfo[Reg.id()].first.get<const TargetRegisterClass *>();
}
/// Return the register class of \p Reg, or null if Reg has not been assigned
@@ -645,7 +654,7 @@
/// None of this condition is possible without GlobalISel for now.
/// In other words, if GlobalISel is not used or if the query happens after
/// the select pass, using getRegClass is safe.
- const TargetRegisterClass *getRegClassOrNull(unsigned Reg) const {
+ const TargetRegisterClass *getRegClassOrNull(Register Reg) const {
const RegClassOrRegBank &Val = VRegInfo[Reg].first;
return Val.dyn_cast<const TargetRegisterClass *>();
}
@@ -654,7 +663,7 @@
/// a register bank or has been assigned a register class.
/// \note It is possible to get the register bank from the register class via
/// RegisterBankInfo::getRegBankFromRegClass.
- const RegisterBank *getRegBankOrNull(unsigned Reg) const {
+ const RegisterBank *getRegBankOrNull(Register Reg) const {
const RegClassOrRegBank &Val = VRegInfo[Reg].first;
return Val.dyn_cast<const RegisterBank *>();
}
@@ -662,17 +671,17 @@
/// Return the register bank or register class of \p Reg.
/// \note Before the register bank gets assigned (i.e., before the
/// RegBankSelect pass) \p Reg may not have either.
- const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const {
+ const RegClassOrRegBank &getRegClassOrRegBank(Register Reg) const {
return VRegInfo[Reg].first;
}
/// setRegClass - Set the register class of the specified virtual register.
- void setRegClass(unsigned Reg, const TargetRegisterClass *RC);
+ void setRegClass(Register Reg, const TargetRegisterClass *RC);
/// Set the register bank to \p RegBank for \p Reg.
- void setRegBank(unsigned Reg, const RegisterBank &RegBank);
+ void setRegBank(Register Reg, const RegisterBank &RegBank);
- void setRegClassOrRegBank(unsigned Reg,
+ void setRegClassOrRegBank(Register Reg,
const RegClassOrRegBank &RCOrRB){
VRegInfo[Reg].first = RCOrRB;
}
@@ -688,7 +697,7 @@
/// Use RegisterBankInfo::constrainGenericRegister in GlobalISel's
/// InstructionSelect pass and constrainRegAttrs in every other pass,
/// including non-select passes of GlobalISel, instead.
- const TargetRegisterClass *constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *constrainRegClass(Register Reg,
const TargetRegisterClass *RC,
unsigned MinNumRegs = 0);
@@ -703,7 +712,7 @@
/// \note Use this method instead of constrainRegClass and
/// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG
/// ISel / FastISel and GlobalISel's InstructionSelect pass respectively.
- bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg,
+ bool constrainRegAttrs(Register Reg, Register ConstrainingReg,
unsigned MinNumRegs = 0);
/// recomputeRegClass - Try to find a legal super-class of Reg's register
@@ -713,7 +722,7 @@
/// This method can be used after constraints have been removed from a
/// virtual register, for example after removing instructions or splitting
/// the live range.
- bool recomputeRegClass(unsigned Reg);
+ bool recomputeRegClass(Register Reg);
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
@@ -726,14 +735,14 @@
/// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
/// (target independent) virtual register.
- LLT getType(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg))
+ LLT getType(Register Reg) const {
+ if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg))
return VRegToType[Reg];
return LLT{};
}
/// Set the low-level type of \p VReg to \p Ty.
- void setType(unsigned VReg, LLT Ty);
+ void setType(Register VReg, LLT Ty);
/// Create and return a new generic virtual register with low-level
/// type \p Ty.
@@ -748,7 +757,7 @@
/// temporarily while constructing machine instructions. Most operations are
/// undefined on an incomplete register until one of setRegClass(),
/// setRegBank() or setSize() has been called on it.
- unsigned createIncompleteVirtualRegister(StringRef Name = "");
+ Register createIncompleteVirtualRegister(StringRef Name = "");
/// getNumVirtRegs - Return the number of virtual registers created.
unsigned getNumVirtRegs() const { return VRegInfo.size(); }
@@ -759,8 +768,8 @@
/// setRegAllocationHint - Specify a register allocation hint for the
/// specified virtual register. This is typically used by target, and in case
/// of an earlier hint it will be overwritten.
- void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg) {
+ assert(VReg.isVirtual());
RegAllocHints[VReg].first = Type;
RegAllocHints[VReg].second.clear();
RegAllocHints[VReg].second.push_back(PrefReg);
@@ -768,19 +777,19 @@
/// addRegAllocationHint - Add a register allocation hint to the hints
/// vector for VReg.
- void addRegAllocationHint(unsigned VReg, unsigned PrefReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ void addRegAllocationHint(Register VReg, Register PrefReg) {
+ assert(Register::isVirtualRegister(VReg));
RegAllocHints[VReg].second.push_back(PrefReg);
}
/// Specify the preferred (target independent) register allocation hint for
/// the specified virtual register.
- void setSimpleHint(unsigned VReg, unsigned PrefReg) {
+ void setSimpleHint(Register VReg, Register PrefReg) {
setRegAllocationHint(VReg, /*Type=*/0, PrefReg);
}
- void clearSimpleHint(unsigned VReg) {
- assert (RegAllocHints[VReg].first == 0 &&
+ void clearSimpleHint(Register VReg) {
+ assert (!RegAllocHints[VReg].first &&
"Expected to clear a non-target hint!");
RegAllocHints[VReg].second.clear();
}
@@ -788,34 +797,46 @@
/// getRegAllocationHint - Return the register allocation hint for the
/// specified virtual register. If there are many hints, this returns the
/// one with the greatest weight.
- std::pair<unsigned, unsigned>
- getRegAllocationHint(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
- unsigned BestHint = (RegAllocHints[VReg].second.size() ?
- RegAllocHints[VReg].second[0] : 0);
- return std::pair<unsigned, unsigned>(RegAllocHints[VReg].first, BestHint);
+ std::pair<Register, Register>
+ getRegAllocationHint(Register VReg) const {
+ assert(VReg.isVirtual());
+ Register BestHint = (RegAllocHints[VReg.id()].second.size() ?
+ RegAllocHints[VReg.id()].second[0] : Register());
+ return std::pair<Register, Register>(RegAllocHints[VReg.id()].first,
+ BestHint);
}
/// getSimpleHint - same as getRegAllocationHint except it will only return
/// a target independent hint.
- unsigned getSimpleHint(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
- std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg);
- return Hint.first ? 0 : Hint.second;
+ Register getSimpleHint(Register VReg) const {
+ assert(VReg.isVirtual());
+ std::pair<Register, Register> Hint = getRegAllocationHint(VReg);
+ return Hint.first ? Register() : Hint.second;
}
/// getRegAllocationHints - Return a reference to the vector of all
/// register allocation hints for VReg.
- const std::pair<unsigned, SmallVector<unsigned, 4>>
- &getRegAllocationHints(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ const std::pair<Register, SmallVector<Register, 4>>
+ &getRegAllocationHints(Register VReg) const {
+ assert(VReg.isVirtual());
return RegAllocHints[VReg];
}
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
- void markUsesInDebugValueAsUndef(unsigned Reg) const;
+ void markUsesInDebugValueAsUndef(Register Reg) const;
+
+ /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions
+ /// to refer to the designated register.
+ void updateDbgUsersToReg(Register Reg,
+ ArrayRef<MachineInstr*> Users) const {
+ for (MachineInstr *MI : Users) {
+ assert(MI->isDebugInstr());
+ assert(MI->getOperand(0).isReg());
+ MI->getOperand(0).setReg(Reg);
+ }
+ }
/// Return true if the specified register is modified in this function.
/// This checks that no defining machine operands exist for the register or
@@ -823,13 +844,13 @@
/// ignored, to consider them pass 'true' for optional parameter
/// SkipNoReturnDef. The register is also considered modified when it is set
/// in the UsedPhysRegMask.
- bool isPhysRegModified(unsigned PhysReg, bool SkipNoReturnDef = false) const;
+ bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef = false) const;
/// Return true if the specified register is modified or read in this
/// function. This checks that no machine operands exist for the register or
/// any of its aliases. The register is also considered used when it is set
/// in the UsedPhysRegMask.
- bool isPhysRegUsed(unsigned PhysReg) const;
+ bool isPhysRegUsed(MCRegister PhysReg) const;
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
/// This corresponds to the bit mask attached to register mask operands.
@@ -864,7 +885,7 @@
/// canReserveReg - Returns true if PhysReg can be used as a reserved
/// register. Any register can be reserved before freezeReservedRegs() is
/// called.
- bool canReserveReg(unsigned PhysReg) const {
+ bool canReserveReg(MCRegister PhysReg) const {
return !reservedRegsFrozen() || ReservedRegs.test(PhysReg);
}
@@ -882,8 +903,8 @@
///
/// Reserved registers may belong to an allocatable register class, but the
/// target has explicitly requested that they are not used.
- bool isReserved(unsigned PhysReg) const {
- return getReservedRegs().test(PhysReg);
+ bool isReserved(MCRegister PhysReg) const {
+ return getReservedRegs().test(PhysReg.id());
}
/// Returns true when the given register unit is considered reserved.
@@ -900,7 +921,7 @@
/// Allocatable registers may show up in the allocation order of some virtual
/// register, so a register allocator needs to track its liveness and
/// availability.
- bool isAllocatable(unsigned PhysReg) const {
+ bool isAllocatable(MCRegister PhysReg) const {
return getTargetRegisterInfo()->isInAllocatableClass(PhysReg) &&
!isReserved(PhysReg);
}
@@ -911,31 +932,31 @@
/// addLiveIn - Add the specified register as a live-in. Note that it
/// is an error to add the same register to the same set more than once.
- void addLiveIn(unsigned Reg, unsigned vreg = 0) {
+ void addLiveIn(MCRegister Reg, Register vreg = Register()) {
LiveIns.push_back(std::make_pair(Reg, vreg));
}
// Iteration support for the live-ins set. It's kept in sorted order
// by register number.
using livein_iterator =
- std::vector<std::pair<unsigned,unsigned>>::const_iterator;
+ std::vector<std::pair<MCRegister,Register>>::const_iterator;
livein_iterator livein_begin() const { return LiveIns.begin(); }
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }
- ArrayRef<std::pair<unsigned, unsigned>> liveins() const {
+ ArrayRef<std::pair<MCRegister, Register>> liveins() const {
return LiveIns;
}
- bool isLiveIn(unsigned Reg) const;
+ bool isLiveIn(Register Reg) const;
/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
/// corresponding live-in physical register.
- unsigned getLiveInPhysReg(unsigned VReg) const;
+ MCRegister getLiveInPhysReg(Register VReg) const;
/// getLiveInVirtReg - If PReg is a live-in physical register, return the
/// corresponding live-in physical register.
- unsigned getLiveInVirtReg(unsigned PReg) const;
+ Register getLiveInVirtReg(MCRegister PReg) const;
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
/// into the given entry block.
@@ -945,7 +966,7 @@
/// Returns a mask covering all bits that can appear in lane masks of
/// subregisters of the virtual register @p Reg.
- LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const;
+ LaneBitmask getMaxLaneMaskForVReg(Register Reg) const;
/// defusechain_iterator - This class provides iterator support for machine
/// operands in the function that use or define a specific register. If
@@ -1162,14 +1183,13 @@
public:
PSetIterator() = default;
- PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) {
+ PSetIterator(Register RegUnit, const MachineRegisterInfo *MRI) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ if (RegUnit.isVirtual()) {
const TargetRegisterClass *RC = MRI->getRegClass(RegUnit);
PSet = TRI->getRegClassPressureSets(RC);
Weight = TRI->getRegClassWeight(RC).RegWeight;
- }
- else {
+ } else {
PSet = TRI->getRegUnitPressureSets(RegUnit);
Weight = TRI->getRegUnitWeight(RegUnit);
}
@@ -1191,8 +1211,8 @@
}
};
-inline PSetIterator MachineRegisterInfo::
-getPressureSets(unsigned RegUnit) const {
+inline PSetIterator
+MachineRegisterInfo::getPressureSets(Register RegUnit) const {
return PSetIterator(RegUnit, this);
}
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineSSAUpdater.h b/linux-x64/clang/include/llvm/CodeGen/MachineSSAUpdater.h
index 0319ec7..0af356e 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -13,6 +13,8 @@
#ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H
#define LLVM_CODEGEN_MACHINESSAUPDATER_H
+#include "llvm/CodeGen/Register.h"
+
namespace llvm {
class MachineBasicBlock;
@@ -35,12 +37,9 @@
private:
/// AvailableVals - This keeps track of which value to use on a per-block
/// basis. When we insert PHI nodes, we keep track of them here.
- //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
+ //typedef DenseMap<MachineBasicBlock*, Register> AvailableValsTy;
void *AV = nullptr;
- /// VR - Current virtual register whose uses are being updated.
- unsigned VR;
-
/// VRC - Register class of the current virtual register.
const TargetRegisterClass *VRC;
@@ -62,11 +61,12 @@
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates.
- void Initialize(unsigned V);
+ void Initialize(Register V);
+ void Initialize(const TargetRegisterClass *RC);
/// AddAvailableValue - Indicate that a rewritten value is available at the
/// end of the specified block with the specified value.
- void AddAvailableValue(MachineBasicBlock *BB, unsigned V);
+ void AddAvailableValue(MachineBasicBlock *BB, Register V);
/// HasValueForBlock - Return true if the MachineSSAUpdater already has a
/// value for the specified block.
@@ -74,7 +74,7 @@
/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
/// live at the end of the specified block.
- unsigned GetValueAtEndOfBlock(MachineBasicBlock *BB);
+ Register GetValueAtEndOfBlock(MachineBasicBlock *BB);
/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
/// is live in the middle of the specified block.
@@ -94,7 +94,7 @@
/// their respective blocks. However, the use of X happens in the *middle* of
/// a block. Because of this, we need to insert a new PHI node in SomeBB to
/// merge the appropriate values, and this value isn't live out of the block.
- unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB);
+ Register GetValueInMiddleOfBlock(MachineBasicBlock *BB);
/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
/// which use their value in the corresponding predecessor. Note that this
@@ -104,7 +104,7 @@
void RewriteUse(MachineOperand &U);
private:
- unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
+ Register GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineScheduler.h b/linux-x64/clang/include/llvm/CodeGen/MachineScheduler.h
index 75a334f..a7edaaa 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineScheduler.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineScheduler.h
@@ -80,7 +80,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/RegisterPressure.h"
@@ -100,7 +99,9 @@
extern cl::opt<bool> ForceTopDown;
extern cl::opt<bool> ForceBottomUp;
+extern cl::opt<bool> VerifyScheduling;
+class AAResults;
class LiveIntervals;
class MachineDominatorTree;
class MachineFunction;
@@ -120,7 +121,7 @@
const MachineLoopInfo *MLI = nullptr;
const MachineDominatorTree *MDT = nullptr;
const TargetPassConfig *PassConfig = nullptr;
- AliasAnalysis *AA = nullptr;
+ AAResults *AA = nullptr;
LiveIntervals *LIS = nullptr;
RegisterClassInfo *RegClassInfo;
@@ -185,6 +186,9 @@
// first.
bool DisableLatencyHeuristic = false;
+ // Compute DFSResult for use in scheduling heuristics.
+ bool ComputeDFSResult = false;
+
MachineSchedPolicy() = default;
};
@@ -260,7 +264,7 @@
/// PreRA and PostRA MachineScheduler.
class ScheduleDAGMI : public ScheduleDAGInstrs {
protected:
- AliasAnalysis *AA;
+ AAResults *AA;
LiveIntervals *LIS;
std::unique_ptr<MachineSchedStrategy> SchedImpl;
@@ -756,7 +760,16 @@
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
- void releaseNode(SUnit *SU, unsigned ReadyCycle);
+ /// Release SU to make it ready. If it's not in hazard, remove it from
+ /// pending queue (if already in) and push into available queue.
+ /// Otherwise, push the SU into pending queue.
+ ///
+ /// @param SU The unit to be released.
+ /// @param ReadyCycle Until which cycle the unit is ready.
+ /// @param InPQueue Whether SU is already in pending queue.
+ /// @param Idx Position offset in pending queue (if in it).
+ void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+ unsigned Idx = 0);
void bumpCycle(unsigned NextCycle);
@@ -954,7 +967,7 @@
if (SU->isScheduled)
return;
- Top.releaseNode(SU, SU->TopReadyCycle);
+ Top.releaseNode(SU, SU->TopReadyCycle, false);
TopCand.SU = nullptr;
}
@@ -962,7 +975,7 @@
if (SU->isScheduled)
return;
- Bot.releaseNode(SU, SU->BotReadyCycle);
+ Bot.releaseNode(SU, SU->BotReadyCycle, false);
BotCand.SU = nullptr;
}
@@ -1008,7 +1021,7 @@
/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
class PostGenericScheduler : public GenericSchedulerBase {
protected:
- ScheduleDAGMI *DAG;
+ ScheduleDAGMI *DAG = nullptr;
SchedBoundary Top;
SmallVector<SUnit*, 8> BotRoots;
@@ -1042,7 +1055,7 @@
void releaseTopNode(SUnit *SU) override {
if (SU->isScheduled)
return;
- Top.releaseNode(SU, SU->TopReadyCycle);
+ Top.releaseNode(SU, SU->TopReadyCycle, false);
}
// Only called for roots.
@@ -1051,7 +1064,7 @@
}
protected:
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
+ virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
void pickNodeFromQueue(SchedCandidate &Cand);
};
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineSizeOpts.h b/linux-x64/clang/include/llvm/CodeGen/MachineSizeOpts.h
new file mode 100644
index 0000000..07bbbad
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineSizeOpts.h
@@ -0,0 +1,46 @@
+//===- MachineSizeOpts.h - machine size optimization ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+#define LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+
+#include "llvm/Transforms/Utils/SizeOpts.h"
+
+namespace llvm {
+
+class ProfileSummaryInfo;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MBFIWrapper;
+
+/// Returns true if machine function \p MF is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIWrapper,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINE_SIZEOPTS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineStableHash.h b/linux-x64/clang/include/llvm/CodeGen/MachineStableHash.h
new file mode 100644
index 0000000..8423b2d
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineStableHash.h
@@ -0,0 +1,30 @@
+//===------------ MachineStableHash.h - MIR Stable Hashing Utilities ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Stable hashing for MachineInstr and MachineOperand. Useful or getting a
+// hash across runs, modules, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H
+#define LLVM_CODEGEN_MACHINESTABLEHASH_H
+
+#include "llvm/CodeGen/StableHashing.h"
+
+namespace llvm {
+class MachineInstr;
+class MachineOperand;
+
+stable_hash stableHashValue(const MachineOperand &MO);
+stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false,
+ bool HashConstantPoolIndices = false,
+ bool HashMemOperands = false);
+
+} // namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/MachineTraceMetrics.h b/linux-x64/clang/include/llvm/CodeGen/MachineTraceMetrics.h
index 0259895..46b5736 100644
--- a/linux-x64/clang/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/linux-x64/clang/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -140,13 +140,13 @@
/// successors.
struct LiveInReg {
/// The virtual register required, or a register unit.
- unsigned Reg;
+ Register Reg;
/// For virtual registers: Minimum height of the defining instruction.
/// For regunits: Height of the highest user in the trace.
unsigned Height;
- LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ LiveInReg(Register Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
};
/// Per-basic block information that relates to a specific trace through the
diff --git a/linux-x64/clang/include/llvm/CodeGen/ModuloSchedule.h b/linux-x64/clang/include/llvm/CodeGen/ModuloSchedule.h
new file mode 100644
index 0000000..1aa2320
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/ModuloSchedule.h
@@ -0,0 +1,389 @@
+//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlaps loop iterations and exploits ILP via compiler transformations.
+//
+// There are multiple methods for analyzing a loop and creating a schedule.
+// An example algorithm is Swing Modulo Scheduling (implemented by the
+// MachinePipeliner). The details of how a schedule is arrived at are irrelevant
+// for the task of actually rewriting a loop to adhere to the schedule, which
+// is what this file does.
+//
+// A schedule is, for every instruction in a block, a Cycle and a Stage. Note
+// that we only support single-block loops, so "block" and "loop" can be used
+// interchangably.
+//
+// The Cycle of an instruction defines a partial order of the instructions in
+// the remapped loop. Instructions within a cycle must not consume the output
+// of any instruction in the same cycle. Cycle information is assumed to have
+// been calculated such that the processor will execute instructions in
+// lock-step (for example in a VLIW ISA).
+//
+// The Stage of an instruction defines the mapping between logical loop
+// iterations and pipelined loop iterations. An example (unrolled) pipeline
+// may look something like:
+//
+// I0[0] Execute instruction I0 of iteration 0
+// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
+// I1[1], I0[2]
+// I1[2], I0[3]
+//
+// In the schedule for this unrolled sequence we would say that I0 was scheduled
+// in stage 0 and I1 in stage 1:
+//
+// loop:
+// [stage 0] x = I0
+// [stage 1] I1 x (from stage 0)
+//
+// And to actually generate valid code we must insert a phi:
+//
+// loop:
+// x' = phi(x)
+// x = I0
+// I1 x'
+//
+// This is a simple example; the rules for how to generate correct code given
+// an arbitrary schedule containing loop-carried values are complex.
+//
+// Note that these examples only mention the steady-state kernel of the
+// generated loop; prologs and epilogs must be generated also that prime and
+// flush the pipeline. Doing so is nontrivial.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <deque>
+#include <vector>
+
+namespace llvm {
+class MachineBasicBlock;
+class MachineInstr;
+class LiveIntervals;
+
+/// Represents a schedule for a single-block loop. For every instruction we
+/// maintain a Cycle and Stage.
+class ModuloSchedule {
+private:
+ /// The block containing the loop instructions.
+ MachineLoop *Loop;
+
+ /// The instructions to be generated, in total order. Cycle provides a partial
+ /// order; the total order within cycles has been decided by the schedule
+ /// producer.
+ std::vector<MachineInstr *> ScheduledInstrs;
+
+ /// The cycle for each instruction.
+ DenseMap<MachineInstr *, int> Cycle;
+
+ /// The stage for each instruction.
+ DenseMap<MachineInstr *, int> Stage;
+
+ /// The number of stages in this schedule (Max(Stage) + 1).
+ int NumStages;
+
+public:
+ /// Create a new ModuloSchedule.
+ /// \arg ScheduledInstrs The new loop instructions, in total resequenced
+ /// order.
+ /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
+ /// not need to start at zero. ScheduledInstrs must be partially ordered by
+ /// Cycle.
+ /// \arg Stage Stage index for all instructions in ScheduleInstrs.
+ ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
+ std::vector<MachineInstr *> ScheduledInstrs,
+ DenseMap<MachineInstr *, int> Cycle,
+ DenseMap<MachineInstr *, int> Stage)
+ : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
+ Stage(std::move(Stage)) {
+ NumStages = 0;
+ for (auto &KV : this->Stage)
+ NumStages = std::max(NumStages, KV.second);
+ ++NumStages;
+ }
+
+ /// Return the single-block loop being scheduled.
+ MachineLoop *getLoop() const { return Loop; }
+
+ /// Return the number of stages contained in this schedule, which is the
+ /// largest stage index + 1.
+ int getNumStages() const { return NumStages; }
+
+ /// Return the first cycle in the schedule, which is the cycle index of the
+ /// first instruction.
+ int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
+
+ /// Return the final cycle in the schedule, which is the cycle index of the
+ /// last instruction.
+ int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
+
+ /// Return the stage that MI is scheduled in, or -1.
+ int getStage(MachineInstr *MI) {
+ auto I = Stage.find(MI);
+ return I == Stage.end() ? -1 : I->second;
+ }
+
+ /// Return the cycle that MI is scheduled at, or -1.
+ int getCycle(MachineInstr *MI) {
+ auto I = Cycle.find(MI);
+ return I == Cycle.end() ? -1 : I->second;
+ }
+
+ /// Set the stage of a newly created instruction.
+ void setStage(MachineInstr *MI, int MIStage) {
+ assert(Stage.count(MI) == 0);
+ Stage[MI] = MIStage;
+ }
+
+ /// Return the rescheduled instructions in order.
+ ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
+
+ void dump() { print(dbgs()); }
+ void print(raw_ostream &OS);
+};
+
+/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
+/// rewriting the old loop and inserting prologs and epilogs as required.
+class ModuloScheduleExpander {
+public:
+ using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
+
+private:
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals &LIS;
+
+ MachineBasicBlock *BB;
+ MachineBasicBlock *Preheader;
+ MachineBasicBlock *NewKernel = nullptr;
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
+
+ /// Map for each register and the max difference between its uses and def.
+ /// The first element in the pair is the max difference in stages. The
+ /// second is true if the register defines a Phi value and loop value is
+ /// scheduled before the Phi.
+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+ /// Instructions to change when emitting the final schedule.
+ InstrChangesTy InstrChanges;
+
+ void generatePipelinedLoop();
+ void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
+ void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs);
+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast);
+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast);
+ void removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs);
+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
+ void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap);
+ bool computeDelta(MachineInstr &MI, unsigned &Delta);
+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+ unsigned Num);
+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ void updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum, unsigned InstrStageNum,
+ ValueMapTy *VRMap);
+ MachineInstr *findDefInLoop(unsigned Reg);
+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+ unsigned LoopStage, ValueMapTy *VRMap,
+ MachineBasicBlock *BB);
+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap);
+ void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
+ unsigned CurStageNum, unsigned PhiNum,
+ MachineInstr *Phi, unsigned OldReg,
+ unsigned NewReg, unsigned PrevReg = 0);
+ bool isLoopCarried(MachineInstr &Phi);
+
+ /// Return the max. number of stages/iterations that can occur between a
+ /// register definition and its uses.
+ unsigned getStagesForReg(int Reg, unsigned CurStage) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
+ Stages.second)
+ return 1;
+ return Stages.first;
+ }
+
+ /// The number of stages for a Phi is a little different than other
+ /// instructions. The minimum value computed in RegToStageDiff is 1
+ /// because we assume the Phi is needed for at least 1 iteration.
+ /// This is not the case if the loop value is scheduled prior to the
+ /// Phi in the same stage. This function returns the number of stages
+ /// or iterations needed between the Phi definition and any uses.
+ unsigned getStagesForPhi(int Reg) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (Stages.second)
+ return Stages.first;
+ return Stages.first - 1;
+ }
+
+public:
+ /// Create a new ModuloScheduleExpander.
+ /// \arg InstrChanges Modifications to make to instructions with memory
+ /// operands.
+ /// FIXME: InstrChanges is opaque and is an implementation detail of an
+ /// optimization in MachinePipeliner that crosses abstraction boundaries.
+ ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals &LIS, InstrChangesTy InstrChanges)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS),
+ InstrChanges(std::move(InstrChanges)) {}
+
+ /// Performs the actual expansion.
+ void expand();
+ /// Performs final cleanup after expansion.
+ void cleanup();
+
+ /// Returns the newly rewritten kernel block, or nullptr if this was
+ /// optimized away.
+ MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
+};
+
+/// A reimplementation of ModuloScheduleExpander. It works by generating a
+/// standalone kernel loop and peeling out the prologs and epilogs.
+class PeelingModuloScheduleExpander {
+public:
+ PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals *LIS)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS) {}
+
+ void expand();
+
+ /// Runs ModuloScheduleExpander and treats it as a golden input to validate
+ /// aspects of the code generated by PeelingModuloScheduleExpander.
+ void validateAgainstModuloScheduleExpander();
+
+protected:
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ /// The original loop block that gets rewritten in-place.
+ MachineBasicBlock *BB;
+ /// The original loop preheader.
+ MachineBasicBlock *Preheader;
+ /// All prolog and epilog blocks.
+ SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
+ /// For every block, the stages that are produced.
+ DenseMap<MachineBasicBlock *, BitVector> LiveStages;
+ /// For every block, the stages that are available. A stage can be available
+ /// but not produced (in the epilog) or produced but not available (in the
+ /// prolog).
+ DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+ /// When peeling the epilogue keep track of the distance between the phi
+ /// nodes and the kernel.
+ DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
+
+ /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
+ /// loop kernel clones.
+ DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
+ DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
+ BlockMIs;
+
+ /// State passed from peelKernel to peelPrologAndEpilogs().
+ std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
+ /// Illegal phis that need to be deleted once we re-link stages.
+ SmallVector<MachineInstr *, 4> IllegalPhisToDelete;
+
+ /// Converts BB from the original loop body to the rewritten, pipelined
+ /// steady-state.
+ void rewriteKernel();
+
+ /// Peels one iteration of the rewritten kernel (BB) in the specified
+ /// direction.
+ MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
+ // Delete instructions whose stage is less than MinStage in the given basic
+ // block.
+ void filterInstructions(MachineBasicBlock *MB, int MinStage);
+ // Move instructions of the given stage from sourceBB to DestBB. Remap the phi
+ // instructions to keep a valid IR.
+ void moveStageBetweenBlocks(MachineBasicBlock *DestBB,
+ MachineBasicBlock *SourceBB, unsigned Stage);
+ /// Peel the kernel forwards and backwards to produce prologs and epilogs,
+ /// and stitch them together.
+ void peelPrologAndEpilogs();
+ /// All prolog and epilog blocks are clones of the kernel, so any produced
+ /// register in one block has an corollary in all other blocks.
+ Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
+ /// Change all users of MI, if MI is predicated out
+ /// (LiveStages[MI->getParent()] == false).
+ void rewriteUsesOf(MachineInstr *MI);
+ /// Insert branches between prologs, kernel and epilogs.
+ void fixupBranches();
+ /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
+ /// to a block dominated by all prologs and epilogs. This allows us to treat
+ /// the loop exiting block as any other kernel clone.
+ MachineBasicBlock *CreateLCSSAExitingBlock();
+ /// Helper to get the stage of an instruction in the schedule.
+ unsigned getStage(MachineInstr *MI) {
+ if (CanonicalMIs.count(MI))
+ MI = CanonicalMIs[MI];
+ return Schedule.getStage(MI);
+ }
+ /// Helper function to find the right canonical register for a phi instruction
+ /// coming from a peeled out prologue.
+ Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
+ /// Target loop info before kernel peeling.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
+};
+
+/// Expander that simply annotates each scheduled instruction with a post-instr
+/// symbol that can be consumed by the ModuloScheduleTest pass.
+///
+/// The post-instr symbol is a way of annotating an instruction that can be
+/// roundtripped in MIR. The syntax is:
+/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
+class ModuloScheduleTestAnnotater {
+ MachineFunction &MF;
+ ModuloSchedule &S;
+
+public:
+ ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
+ : MF(MF), S(S) {}
+
+ /// Performs the annotation.
+ void annotate();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/MultiHazardRecognizer.h b/linux-x64/clang/include/llvm/CodeGen/MultiHazardRecognizer.h
new file mode 100644
index 0000000..9846045
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/MultiHazardRecognizer.h
@@ -0,0 +1,47 @@
+//=- llvm/CodeGen/MultiHazardRecognizer.h - Scheduling Support ----*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MultiHazardRecognizer class, which is a wrapper
+// for a set of ScheduleHazardRecognizer instances
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class MachineInstr;
+class SUnit;
+
+class MultiHazardRecognizer : public ScheduleHazardRecognizer {
+ SmallVector<std::unique_ptr<ScheduleHazardRecognizer>, 4> Recognizers;
+
+public:
+ MultiHazardRecognizer() = default;
+ void AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer> &&);
+
+ bool atIssueLimit() const override;
+ HazardType getHazardType(SUnit *, int Stalls = 0) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *) override;
+ void EmitInstruction(MachineInstr *) override;
+ unsigned PreEmitNoops(SUnit *) override;
+ unsigned PreEmitNoops(MachineInstr *) override;
+ bool ShouldPreferAnother(SUnit *) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void EmitNoop() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/NonRelocatableStringpool.h b/linux-x64/clang/include/llvm/CodeGen/NonRelocatableStringpool.h
new file mode 100644
index 0000000..fe07c70
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/NonRelocatableStringpool.h
@@ -0,0 +1,83 @@
+//===- NonRelocatableStringpool.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
+#define LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
+
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/Allocator.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+
+/// A string table that doesn't need relocations.
+///
+/// Use this class when a string table doesn't need relocations.
+/// This class provides this ability by just associating offsets with strings.
+class NonRelocatableStringpool {
+public:
+ /// Entries are stored into the StringMap and simply linked together through
+ /// the second element of this pair in order to keep track of insertion
+ /// order.
+ using MapTy = StringMap<DwarfStringPoolEntry, BumpPtrAllocator>;
+
+ NonRelocatableStringpool(
+ std::function<StringRef(StringRef Input)> Translator = nullptr,
+ bool PutEmptyString = false)
+ : Translator(Translator) {
+ if (PutEmptyString)
+ EmptyString = getEntry("");
+ }
+
+ DwarfStringPoolEntryRef getEntry(StringRef S);
+
+ /// Get the offset of string \p S in the string table. This can insert a new
+ /// element or return the offset of a pre-existing one.
+ uint64_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); }
+
+ /// Get permanent storage for \p S (but do not necessarily emit \p S in the
+ /// output section). A latter call to getStringOffset() with the same string
+ /// will chain it though.
+ ///
+ /// \returns The StringRef that points to permanent storage to use
+ /// in place of \p S.
+ StringRef internString(StringRef S);
+
+ uint64_t getSize() { return CurrentEndOffset; }
+
+ /// Return the list of strings to be emitted. This does not contain the
+ /// strings which were added via internString only.
+ std::vector<DwarfStringPoolEntryRef> getEntriesForEmission() const;
+
+private:
+ MapTy Strings;
+ uint64_t CurrentEndOffset = 0;
+ unsigned NumEntries = 0;
+ DwarfStringPoolEntryRef EmptyString;
+ std::function<StringRef(StringRef Input)> Translator;
+};
+
+/// Helper for making strong types.
+template <typename T, typename S> class StrongType : public T {
+public:
+ template <typename... Args>
+ explicit StrongType(Args... A) : T(std::forward<Args>(A)...) {}
+};
+
+/// It's very easy to introduce bugs by passing the wrong string pool.
+/// By using strong types the interface enforces that the right
+/// kind of pool is used.
+struct UniqueTag {};
+struct OffsetsTag {};
+using UniquingStringPool = StrongType<NonRelocatableStringpool, UniqueTag>;
+using OffsetsStringPool = StrongType<NonRelocatableStringpool, OffsetsTag>;
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/PBQP/Math.h b/linux-x64/clang/include/llvm/CodeGen/PBQP/Math.h
index 8b014cc..099ba78 100644
--- a/linux-x64/clang/include/llvm/CodeGen/PBQP/Math.h
+++ b/linux-x64/clang/include/llvm/CodeGen/PBQP/Math.h
@@ -28,17 +28,17 @@
public:
/// Construct a PBQP vector of the given size.
explicit Vector(unsigned Length)
- : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {}
+ : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) {}
/// Construct a PBQP vector with initializer.
Vector(unsigned Length, PBQPNum InitVal)
- : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+ : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) {
std::fill(Data.get(), Data.get() + Length, InitVal);
}
/// Copy construct a PBQP vector.
Vector(const Vector &V)
- : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+ : Length(V.Length), Data(std::make_unique<PBQPNum []>(Length)) {
std::copy(V.Data.get(), V.Data.get() + Length, Data.get());
}
@@ -125,21 +125,21 @@
public:
/// Construct a PBQP Matrix with the given dimensions.
Matrix(unsigned Rows, unsigned Cols) :
- Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Rows(Rows), Cols(Cols), Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
}
/// Construct a PBQP Matrix with the given dimensions and initial
/// value.
Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal)
: Rows(Rows), Cols(Cols),
- Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal);
}
/// Copy construct a PBQP matrix.
Matrix(const Matrix &M)
: Rows(M.Rows), Cols(M.Cols),
- Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get());
}
diff --git a/linux-x64/clang/include/llvm/CodeGen/ParallelCG.h b/linux-x64/clang/include/llvm/CodeGen/ParallelCG.h
index a44715d..5504baa 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ParallelCG.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ParallelCG.h
@@ -14,15 +14,14 @@
#define LLVM_CODEGEN_PARALLELCG_H
#include "llvm/Support/CodeGen.h"
-#include "llvm/Target/TargetMachine.h"
-
#include <functional>
+#include <memory>
namespace llvm {
template <typename T> class ArrayRef;
class Module;
-class TargetOptions;
+class TargetMachine;
class raw_pwrite_stream;
/// Split M into OSs.size() partitions, and generate code for each. Takes a
@@ -39,7 +38,7 @@
splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
- TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile,
+ CodeGenFileType FileType = CGFT_ObjectFile,
bool PreserveLocals = false);
} // namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/Passes.h b/linux-x64/clang/include/llvm/CodeGen/Passes.h
index d92ee93..676ed2c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/Passes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/Passes.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_PASSES_H
#define LLVM_CODEGEN_PASSES_H
+#include "llvm/Support/CodeGen.h"
#include <functional>
#include <string>
@@ -22,6 +23,7 @@
class FunctionPass;
class MachineFunction;
class MachineFunctionPass;
+class MemoryBuffer;
class ModulePass;
class Pass;
class TargetMachine;
@@ -42,6 +44,16 @@
/// the entry block.
FunctionPass *createUnreachableBlockEliminationPass();
+ /// createBasicBlockSections Pass - This pass assigns sections to machine
+ /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory
+ /// buffer that contains the list of functions and basic block ids to
+ /// selectively enable basic block sections.
+ MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf);
+
+ /// createMachineFunctionSplitterPass - This pass splits machine functions
+ /// using profile information.
+ MachineFunctionPass *createMachineFunctionSplitterPass();
+
/// MachineFunctionPrinter pass - This pass prints out the machine function to
/// the given stream as a debugging tool.
MachineFunctionPass *
@@ -64,10 +76,6 @@
/// matching during instruction selection.
FunctionPass *createCodeGenPreparePass();
- /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather
- /// and scatter intrinsics with scalar code when target doesn't support them.
- FunctionPass *createScalarizeMaskedMemIntrinPass();
-
/// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg
/// load-linked/store-conditional loops.
extern char &AtomicExpandID;
@@ -185,11 +193,11 @@
/// register allocation.
extern char &ExpandPostRAPseudosID;
- /// createPostRAHazardRecognizer - This pass runs the post-ra hazard
+ /// PostRAHazardRecognizer - This pass runs the post-ra hazard
/// recognizer.
extern char &PostRAHazardRecognizerID;
- /// createPostRAScheduler - This pass performs post register allocation
+ /// PostRAScheduler - This pass performs post register allocation
/// scheduling.
extern char &PostRASchedulerID;
@@ -226,6 +234,10 @@
/// inserting cmov instructions.
extern char &EarlyIfConverterID;
+ /// EarlyIfPredicator - This pass performs if-conversion on SSA form by
+ /// predicating if/else block and insert select at the join point.
+ extern char &EarlyIfPredicatorID;
+
/// This pass performs instruction combining using trace metrics to estimate
/// critical-path and resource depth.
extern char &MachineCombinerID;
@@ -271,6 +283,11 @@
/// MachineCSE - This pass performs global CSE on machine instructions.
extern char &MachineCSEID;
+ /// MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs
+ /// according to the semantics of the instruction as well as hoists
+ /// code.
+ extern char &MIRCanonicalizerID;
+
/// ImplicitNullChecks - This pass folds null pointer checks into nearby
/// memory operations.
extern char &ImplicitNullChecksID;
@@ -324,7 +341,7 @@
/// createDwarfEHPass - This pass mulches exception handling code into a form
/// adapted to code generation. Required if using dwarf exception handling.
- FunctionPass *createDwarfEHPass();
+ FunctionPass *createDwarfEHPass(CodeGenOpt::Level OptLevel);
/// createWinEHPass - Prepares personality functions used by MSVC on Windows,
/// in addition to the Itanium LSDA based personalities.
@@ -333,7 +350,7 @@
/// createSjLjEHPreparePass - This pass adapts exception handling code to use
/// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
///
- FunctionPass *createSjLjEHPreparePass();
+ FunctionPass *createSjLjEHPreparePass(const TargetMachine *TM);
/// createWasmEHPass - This pass adapts exception handling code to use
/// WebAssembly's exception handling scheme.
@@ -370,10 +387,6 @@
/// createJumpInstrTables - This pass creates jump-instruction tables.
ModulePass *createJumpInstrTablesPass();
- /// createForwardControlFlowIntegrityPass - This pass adds control-flow
- /// integrity.
- ModulePass *createForwardControlFlowIntegrityPass();
-
/// InterleavedAccess Pass - This pass identifies and matches interleaved
/// memory accesses to target specific intrinsics.
///
@@ -447,9 +460,38 @@
/// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
FunctionPass *createCFIInstrInserter();
+ /// Creates CFGuard longjmp target identification pass.
+ /// \see CFGuardLongjmp.cpp
+ FunctionPass *createCFGuardLongjmpPass();
+
/// Create Hardware Loop pass. \see HardwareLoops.cpp
FunctionPass *createHardwareLoopsPass();
+ /// This pass inserts pseudo probe annotation for callsite profiling.
+ FunctionPass *createPseudoProbeInserter();
+
+ /// Create IR Type Promotion pass. \see TypePromotion.cpp
+ FunctionPass *createTypePromotionPass();
+
+ /// Creates MIR Debugify pass. \see MachineDebugify.cpp
+ ModulePass *createDebugifyMachineModulePass();
+
+ /// Creates MIR Strip Debug pass. \see MachineStripDebug.cpp
+ /// If OnlyDebugified is true then it will only strip debug info if it was
+ /// added by a Debugify pass. The module will be left unchanged if the debug
+ /// info was generated by another source such as clang.
+ ModulePass *createStripDebugMachineModulePass(bool OnlyDebugified);
+
+ /// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp
+ ModulePass *createCheckDebugMachineModulePass();
+
+ /// The pass fixups statepoint machine instruction to replace usage of
+ /// caller saved registers with stack slots.
+ extern char &FixupStatepointCallerSavedID;
+
+ /// The pass transform load/store <256 x i32> to AMX load/store intrinsics
+ /// or split the data to two <128 x i32>.
+ FunctionPass *createX86LowerAMXTypePass();
} // End llvm namespace
#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/PseudoSourceValue.h b/linux-x64/clang/include/llvm/CodeGen/PseudoSourceValue.h
index 4b3cc91..f148701 100644
--- a/linux-x64/clang/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/linux-x64/clang/include/llvm/CodeGen/PseudoSourceValue.h
@@ -14,19 +14,19 @@
#define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
#include "llvm/ADT/StringMap.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ValueMap.h"
#include <map>
namespace llvm {
+class GlobalValue;
class MachineFrameInfo;
class MachineMemOperand;
+class MIRFormatter;
+class PseudoSourceValue;
class raw_ostream;
class TargetInstrInfo;
-raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
-class PseudoSourceValue;
raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
/// Special value supplied for machine level alias analysis. It indicates that
@@ -52,6 +52,7 @@
const PseudoSourceValue* PSV);
friend class MachineMemOperand; // For printCustom().
+ friend class MIRFormatter; // For printCustom().
/// Implement printing for PseudoSourceValue. This is called from
/// Value::print or Value's operator<<.
diff --git a/linux-x64/clang/include/llvm/CodeGen/RDFGraph.h b/linux-x64/clang/include/llvm/CodeGen/RDFGraph.h
new file mode 100644
index 0000000..00d6ec9
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/RDFGraph.h
@@ -0,0 +1,964 @@
+//===- RDFGraph.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF)
+// for a non-SSA program representation (e.g. post-RA machine code).
+//
+//
+// *** Introduction
+//
+// The RDF graph is a collection of nodes, each of which denotes some element
+// of the program. There are two main types of such elements: code and refe-
+// rences. Conceptually, "code" is something that represents the structure
+// of the program, e.g. basic block or a statement, while "reference" is an
+// instance of accessing a register, e.g. a definition or a use. Nodes are
+// connected with each other based on the structure of the program (such as
+// blocks, instructions, etc.), and based on the data flow (e.g. reaching
+// definitions, reached uses, etc.). The single-reaching-definition principle
+// of SSA is generally observed, although, due to the non-SSA representation
+// of the program, there are some differences between the graph and a "pure"
+// SSA representation.
+//
+//
+// *** Implementation remarks
+//
+// Since the graph can contain a large number of nodes, memory consumption
+// was one of the major design considerations. As a result, there is a single
+// base class NodeBase which defines all members used by all possible derived
+// classes. The members are arranged in a union, and a derived class cannot
+// add any data members of its own. Each derived class only defines the
+// functional interface, i.e. member functions. NodeBase must be a POD,
+// which implies that all of its members must also be PODs.
+// Since nodes need to be connected with other nodes, pointers have been
+// replaced with 32-bit identifiers: each node has an id of type NodeId.
+// There are mapping functions in the graph that translate between actual
+// memory addresses and the corresponding identifiers.
+// A node id of 0 is equivalent to nullptr.
+//
+//
+// *** Structure of the graph
+//
+// A code node is always a collection of other nodes. For example, a code
+// node corresponding to a basic block will contain code nodes corresponding
+// to instructions. In turn, a code node corresponding to an instruction will
+// contain a list of reference nodes that correspond to the definitions and
+// uses of registers in that instruction. The members are arranged into a
+// circular list, which is yet another consequence of the effort to save
+// memory: for each member node it should be possible to obtain its owner,
+// and it should be possible to access all other members. There are other
+// ways to accomplish that, but the circular list seemed the most natural.
+//
+// +- CodeNode -+
+// | | <---------------------------------------------------+
+// +-+--------+-+ |
+// |FirstM |LastM |
+// | +-------------------------------------+ |
+// | | |
+// V V |
+// +----------+ Next +----------+ Next Next +----------+ Next |
+// | |----->| |-----> ... ----->| |----->-+
+// +- Member -+ +- Member -+ +- Member -+
+//
+// The order of members is such that related reference nodes (see below)
+// should be contiguous on the member list.
+//
+// A reference node is a node that encapsulates an access to a register,
+// in other words, data flowing into or out of a register. There are two
+// major kinds of reference nodes: defs and uses. A def node will contain
+// the id of the first reached use, and the id of the first reached def.
+// Each def and use will contain the id of the reaching def, and also the
+// id of the next reached def (for def nodes) or use (for use nodes).
+// The "next node sharing the same reaching def" is denoted as "sibling".
+// In summary:
+// - Def node contains: reaching def, sibling, first reached def, and first
+// reached use.
+// - Use node contains: reaching def and sibling.
+//
+// +-- DefNode --+
+// | R2 = ... | <---+--------------------+
+// ++---------+--+ | |
+// |Reached |Reached | |
+// |Def |Use | |
+// | | |Reaching |Reaching
+// | V |Def |Def
+// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib
+// | | ... = R2 |----->| ... = R2 |----> ... ----> 0
+// | +-------------+ +-------------+
+// V
+// +-- DefNode --+ Sib
+// | R2 = ... |----> ...
+// ++---------+--+
+// | |
+// | |
+// ... ...
+//
+// To get a full picture, the circular lists connecting blocks within a
+// function, instructions within a block, etc. should be superimposed with
+// the def-def, def-use links shown above.
+// To illustrate this, consider a small example in a pseudo-assembly:
+// foo:
+// add r2, r0, r1 ; r2 = r0+r1
+// addi r0, r2, 1 ; r0 = r2+1
+// ret r0 ; return value in r0
+//
+// The graph (in a format used by the debugging functions) would look like:
+//
+// DFG dump:[
+// f1: Function foo
+// b2: === %bb.0 === preds(0), succs(0):
+// p3: phi [d4<r0>(,d12,u9):]
+// p5: phi [d6<r1>(,,u10):]
+// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
+// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
+// s14: ret [u15<r0>(d12):]
+// ]
+//
+// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
+// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
+// ment, d - def, u - use).
+// The format of a def node is:
+// dN<R>(rd,d,u):sib,
+// where
+// N - numeric node id,
+// R - register being defined
+// rd - reaching def,
+// d - reached def,
+// u - reached use,
+// sib - sibling.
+// The format of a use node is:
+// uN<R>[!](rd):sib,
+// where
+// N - numeric node id,
+// R - register being used,
+// rd - reaching def,
+// sib - sibling.
+// Possible annotations (usually preceding the node id):
+// + - preserving def,
+// ~ - clobbering def,
+// " - shadow ref (follows the node id),
+// ! - fixed register (appears after register name).
+//
+// The circular lists are not explicit in the dump.
+//
+//
+// *** Node attributes
+//
+// NodeBase has a member "Attrs", which is the primary way of determining
+// the node's characteristics. The fields in this member decide whether
+// the node is a code node or a reference node (i.e. node's "type"), then
+// within each type, the "kind" determines what specifically this node
+// represents. The remaining bits, "flags", contain additional information
+// that is even more detailed than the "kind".
+// CodeNode's kinds are:
+// - Phi: Phi node, members are reference nodes.
+// - Stmt: Statement, members are reference nodes.
+// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
+// - Func: The whole function. The members are basic block nodes.
+// RefNode's kinds are:
+// - Use.
+// - Def.
+//
+// Meaning of flags:
+// - Preserving: applies only to defs. A preserving def is one that can
+// preserve some of the original bits among those that are included in
+// the register associated with that def. For example, if R0 is a 32-bit
+// register, but a def can only change the lower 16 bits, then it will
+// be marked as preserving.
+// - Shadow: a reference that has duplicates holding additional reaching
+// defs (see more below).
+// - Clobbering: applied only to defs, indicates that the value generated
+// by this def is unspecified. A typical example would be volatile registers
+// after function calls.
+// - Fixed: the register in this def/use cannot be replaced with any other
+// register. A typical case would be a parameter register to a call, or
+// the register with the return value from a function.
+// - Undef: the register in this reference the register is assumed to have
+// no pre-existing value, even if it appears to be reached by some def.
+// This is typically used to prevent keeping registers artificially live
+// in cases when they are defined via predicated instructions. For example:
+// r0 = add-if-true cond, r10, r11 (1)
+// r0 = add-if-false cond, r12, r13, implicit r0 (2)
+// ... = r0 (3)
+// Before (1), r0 is not intended to be live, and the use of r0 in (3) is
+// not meant to be reached by any def preceding (1). However, since the
+// defs in (1) and (2) are both preserving, these properties alone would
+// imply that the use in (3) may indeed be reached by some prior def.
+// Adding Undef flag to the def in (1) prevents that. The Undef flag
+// may be applied to both defs and uses.
+// - Dead: applies only to defs. The value coming out of a "dead" def is
+// assumed to be unused, even if the def appears to be reaching other defs
+// or uses. The motivation for this flag comes from dead defs on function
+// calls: there is no way to determine if such a def is dead without
+// analyzing the target's ABI. Hence the graph should contain this info,
+// as it is unavailable otherwise. On the other hand, a def without any
+// uses on a typical instruction is not the intended target for this flag.
+//
+// *** Shadow references
+//
+// It may happen that a super-register can have two (or more) non-overlapping
+// sub-registers. When both of these sub-registers are defined and followed
+// by a use of the super-register, the use of the super-register will not
+// have a unique reaching def: both defs of the sub-registers need to be
+// accounted for. In such cases, a duplicate use of the super-register is
+// added and it points to the extra reaching def. Both uses are marked with
+// a flag "shadow". Example:
+// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
+// set r0, 1 ; r0 = 1
+// set r1, 1 ; r1 = 1
+// addi t1, t0, 1 ; t1 = t0+1
+//
+// The DFG:
+// s1: set [d2<r0>(,,u9):]
+// s3: set [d4<r1>(,,u10):]
+// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
+//
+// The statement s5 has two use nodes for t0: u7" and u9". The quotation
+// mark " indicates that the node is a shadow.
+//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+
+#include "RDFRegisters.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+// RDF uses uint32_t to refer to registers. This is to ensure that the type
+// size remains specific. In other places, registers are often stored using
+// unsigned.
+static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineDominanceFrontier;
+class MachineDominatorTree;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class raw_ostream;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+namespace rdf {
+
+ using NodeId = uint32_t;
+
+ struct DataFlowGraph;
+
+ struct NodeAttrs {
+ enum : uint16_t {
+ None = 0x0000, // Nothing
+
+ // Types: 2 bits
+ TypeMask = 0x0003,
+ Code = 0x0001, // 01, Container
+ Ref = 0x0002, // 10, Reference
+
+ // Kind: 3 bits
+ KindMask = 0x0007 << 2,
+ Def = 0x0001 << 2, // 001
+ Use = 0x0002 << 2, // 010
+ Phi = 0x0003 << 2, // 011
+ Stmt = 0x0004 << 2, // 100
+ Block = 0x0005 << 2, // 101
+ Func = 0x0006 << 2, // 110
+
+ // Flags: 7 bits for now
+ FlagMask = 0x007F << 5,
+ Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs.
+ Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values.
+ PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode.
+ Preserving = 0x0008 << 5, // 0001000, Def can keep original bits.
+ Fixed = 0x0010 << 5, // 0010000, Fixed register.
+ Undef = 0x0020 << 5, // 0100000, Has no pre-existing value.
+ Dead = 0x0040 << 5, // 1000000, Does not define a value.
+ };
+
+ static uint16_t type(uint16_t T) { return T & TypeMask; }
+ static uint16_t kind(uint16_t T) { return T & KindMask; }
+ static uint16_t flags(uint16_t T) { return T & FlagMask; }
+
+ static uint16_t set_type(uint16_t A, uint16_t T) {
+ return (A & ~TypeMask) | T;
+ }
+
+ static uint16_t set_kind(uint16_t A, uint16_t K) {
+ return (A & ~KindMask) | K;
+ }
+
+ static uint16_t set_flags(uint16_t A, uint16_t F) {
+ return (A & ~FlagMask) | F;
+ }
+
+ // Test if A contains B.
+ static bool contains(uint16_t A, uint16_t B) {
+ if (type(A) != Code)
+ return false;
+ uint16_t KB = kind(B);
+ switch (kind(A)) {
+ case Func:
+ return KB == Block;
+ case Block:
+ return KB == Phi || KB == Stmt;
+ case Phi:
+ case Stmt:
+ return type(B) == Ref;
+ }
+ return false;
+ }
+ };
+
+ struct BuildOptions {
+ enum : unsigned {
+ None = 0x00,
+ KeepDeadPhis = 0x01, // Do not remove dead phis during build.
+ };
+ };
+
+ template <typename T> struct NodeAddr {
+ NodeAddr() = default;
+ NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
+
+ // Type cast (casting constructor). The reason for having this class
+ // instead of std::pair.
+ template <typename S> NodeAddr(const NodeAddr<S> &NA)
+ : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
+ bool operator== (const NodeAddr<T> &NA) const {
+ assert((Addr == NA.Addr) == (Id == NA.Id));
+ return Addr == NA.Addr;
+ }
+ bool operator!= (const NodeAddr<T> &NA) const {
+ return !operator==(NA);
+ }
+
+ T Addr = nullptr;
+ NodeId Id = 0;
+ };
+
+ struct NodeBase;
+
+ // Fast memory allocation and translation between node id and node address.
+ // This is really the same idea as the one underlying the "bump pointer
+ // allocator", the difference being in the translation. A node id is
+ // composed of two components: the index of the block in which it was
+ // allocated, and the index within the block. With the default settings,
+ // where the number of nodes per block is 4096, the node id (minus 1) is:
+ //
+ // bit position: 11 0
+ // +----------------------------+--------------+
+ // | Index of the block |Index in block|
+ // +----------------------------+--------------+
+ //
+ // The actual node id is the above plus 1, to avoid creating a node id of 0.
+ //
+ // This method significantly improved the build time, compared to using maps
+ // (std::unordered_map or DenseMap) to translate between pointers and ids.
+ struct NodeAllocator {
+ // Amount of storage for a single node.
+ enum { NodeMemSize = 32 };
+
+ NodeAllocator(uint32_t NPB = 4096)
+ : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
+ IndexMask((1 << BitsPerIndex)-1) {
+ assert(isPowerOf2_32(NPB));
+ }
+
+ NodeBase *ptr(NodeId N) const {
+ uint32_t N1 = N-1;
+ uint32_t BlockN = N1 >> BitsPerIndex;
+ uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
+ return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
+ }
+
+ NodeId id(const NodeBase *P) const;
+ NodeAddr<NodeBase*> New();
+ void clear();
+
+ private:
+ void startNewBlock();
+ bool needNewBlock();
+
+ uint32_t makeId(uint32_t Block, uint32_t Index) const {
+ // Add 1 to the id, to avoid the id of 0, which is treated as "null".
+ return ((Block << BitsPerIndex) | Index) + 1;
+ }
+
+ const uint32_t NodesPerBlock;
+ const uint32_t BitsPerIndex;
+ const uint32_t IndexMask;
+ char *ActiveEnd = nullptr;
+ std::vector<char*> Blocks;
+ using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>;
+ AllocatorTy MemPool;
+ };
+
+ using RegisterSet = std::set<RegisterRef>;
+
+ struct TargetOperandInfo {
+ TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
+ virtual ~TargetOperandInfo() = default;
+
+ virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
+
+ const TargetInstrInfo &TII;
+ };
+
+ // Packed register reference. Only used for storage.
+ struct PackedRegisterRef {
+ RegisterId Reg;
+ uint32_t MaskId;
+ };
+
+ struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
+ LaneMaskIndex() = default;
+
+ LaneBitmask getLaneMaskForIndex(uint32_t K) const {
+ return K == 0 ? LaneBitmask::getAll() : get(K);
+ }
+
+ uint32_t getIndexForLaneMask(LaneBitmask LM) {
+ assert(LM.any());
+ return LM.all() ? 0 : insert(LM);
+ }
+
+ uint32_t getIndexForLaneMask(LaneBitmask LM) const {
+ assert(LM.any());
+ return LM.all() ? 0 : find(LM);
+ }
+ };
+
+ struct NodeBase {
+ public:
+ // Make sure this is a POD.
+ NodeBase() = default;
+
+ uint16_t getType() const { return NodeAttrs::type(Attrs); }
+ uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
+ uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
+ NodeId getNext() const { return Next; }
+
+ uint16_t getAttrs() const { return Attrs; }
+ void setAttrs(uint16_t A) { Attrs = A; }
+ void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
+
+ // Insert node NA after "this" in the circular chain.
+ void append(NodeAddr<NodeBase*> NA);
+
+ // Initialize all members to 0.
+ void init() { memset(this, 0, sizeof *this); }
+
+ void setNext(NodeId N) { Next = N; }
+
+ protected:
+ uint16_t Attrs;
+ uint16_t Reserved;
+ NodeId Next; // Id of the next node in the circular chain.
+ // Definitions of nested types. Using anonymous nested structs would make
+ // this class definition clearer, but unnamed structs are not a part of
+ // the standard.
+ struct Def_struct {
+ NodeId DD, DU; // Ids of the first reached def and use.
+ };
+ struct PhiU_struct {
+ NodeId PredB; // Id of the predecessor block for a phi use.
+ };
+ struct Code_struct {
+ void *CP; // Pointer to the actual code.
+ NodeId FirstM, LastM; // Id of the first member and last.
+ };
+ struct Ref_struct {
+ NodeId RD, Sib; // Ids of the reaching def and the sibling.
+ union {
+ Def_struct Def;
+ PhiU_struct PhiU;
+ };
+ union {
+ MachineOperand *Op; // Non-phi refs point to a machine operand.
+ PackedRegisterRef PR; // Phi refs store register info directly.
+ };
+ };
+
+ // The actual payload.
+ union {
+ Ref_struct Ref;
+ Code_struct Code;
+ };
+ };
+ // The allocator allocates chunks of 32 bytes for each node. The fact that
+ // each node takes 32 bytes in memory is used for fast translation between
+ // the node id and the node address.
+ static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
+ "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
+
+ using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>;
+ using NodeSet = std::set<NodeId>;
+
+ struct RefNode : public NodeBase {
+ RefNode() = default;
+
+ RegisterRef getRegRef(const DataFlowGraph &G) const;
+
+ MachineOperand &getOp() {
+ assert(!(getFlags() & NodeAttrs::PhiRef));
+ return *Ref.Op;
+ }
+
+ void setRegRef(RegisterRef RR, DataFlowGraph &G);
+ void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+
+ NodeId getReachingDef() const {
+ return Ref.RD;
+ }
+ void setReachingDef(NodeId RD) {
+ Ref.RD = RD;
+ }
+
+ NodeId getSibling() const {
+ return Ref.Sib;
+ }
+ void setSibling(NodeId Sib) {
+ Ref.Sib = Sib;
+ }
+
+ bool isUse() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Use;
+ }
+
+ bool isDef() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Def;
+ }
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+ const DataFlowGraph &G);
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct DefNode : public RefNode {
+ NodeId getReachedDef() const {
+ return Ref.Def.DD;
+ }
+ void setReachedDef(NodeId D) {
+ Ref.Def.DD = D;
+ }
+ NodeId getReachedUse() const {
+ return Ref.Def.DU;
+ }
+ void setReachedUse(NodeId U) {
+ Ref.Def.DU = U;
+ }
+
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct UseNode : public RefNode {
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct PhiUseNode : public UseNode {
+ NodeId getPredecessor() const {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ return Ref.PhiU.PredB;
+ }
+ void setPredecessor(NodeId B) {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ Ref.PhiU.PredB = B;
+ }
+ };
+
+ struct CodeNode : public NodeBase {
+ template <typename T> T getCode() const {
+ return static_cast<T>(Code.CP);
+ }
+ void setCode(void *C) {
+ Code.CP = C;
+ }
+
+ NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
+ NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
+ void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+ void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+ const DataFlowGraph &G);
+ void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+
+ NodeList members(const DataFlowGraph &G) const;
+ template <typename Predicate>
+ NodeList members_if(Predicate P, const DataFlowGraph &G) const;
+ };
+
+ struct InstrNode : public CodeNode {
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct PhiNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return nullptr;
+ }
+ };
+
+ struct StmtNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return CodeNode::getCode<MachineInstr*>();
+ }
+ };
+
+ struct BlockNode : public CodeNode {
+ MachineBasicBlock *getCode() const {
+ return CodeNode::getCode<MachineBasicBlock*>();
+ }
+
+ void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
+ };
+
+ struct FuncNode : public CodeNode {
+ MachineFunction *getCode() const {
+ return CodeNode::getCode<MachineFunction*>();
+ }
+
+ NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const;
+ NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
+ };
+
+ struct DataFlowGraph {
+ DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi);
+
+ NodeBase *ptr(NodeId N) const;
+ template <typename T> T ptr(NodeId N) const {
+ return static_cast<T>(ptr(N));
+ }
+
+ NodeId id(const NodeBase *P) const;
+
+ template <typename T> NodeAddr<T> addr(NodeId N) const {
+ return { ptr<T>(N), N };
+ }
+
+ NodeAddr<FuncNode*> getFunc() const { return Func; }
+ MachineFunction &getMF() const { return MF; }
+ const TargetInstrInfo &getTII() const { return TII; }
+ const TargetRegisterInfo &getTRI() const { return TRI; }
+ const PhysicalRegisterInfo &getPRI() const { return PRI; }
+ const MachineDominatorTree &getDT() const { return MDT; }
+ const MachineDominanceFrontier &getDF() const { return MDF; }
+ const RegisterAggr &getLiveIns() const { return LiveIns; }
+
+ struct DefStack {
+ DefStack() = default;
+
+ bool empty() const { return Stack.empty() || top() == bottom(); }
+
+ private:
+ using value_type = NodeAddr<DefNode *>;
+ struct Iterator {
+ using value_type = DefStack::value_type;
+
+ Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
+ Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+
+ value_type operator*() const {
+ assert(Pos >= 1);
+ return DS.Stack[Pos-1];
+ }
+ const value_type *operator->() const {
+ assert(Pos >= 1);
+ return &DS.Stack[Pos-1];
+ }
+ bool operator==(const Iterator &It) const { return Pos == It.Pos; }
+ bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+
+ private:
+ friend struct DefStack;
+
+ Iterator(const DefStack &S, bool Top);
+
+ // Pos-1 is the index in the StorageType object that corresponds to
+ // the top of the DefStack.
+ const DefStack &DS;
+ unsigned Pos;
+ };
+
+ public:
+ using iterator = Iterator;
+
+ iterator top() const { return Iterator(*this, true); }
+ iterator bottom() const { return Iterator(*this, false); }
+ unsigned size() const;
+
+ void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
+ void pop();
+ void start_block(NodeId N);
+ void clear_block(NodeId N);
+
+ private:
+ friend struct Iterator;
+
+ using StorageType = std::vector<value_type>;
+
+ bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
+ return (P.Addr == nullptr) && (N == 0 || P.Id == N);
+ }
+
+ unsigned nextUp(unsigned P) const;
+ unsigned nextDown(unsigned P) const;
+
+ StorageType Stack;
+ };
+
+ // Make this std::unordered_map for speed of accessing elements.
+ // Map: Register (physical or virtual) -> DefStack
+ using DefStackMap = std::unordered_map<RegisterId, DefStack>;
+
+ void build(unsigned Options = BuildOptions::None);
+ void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void markBlock(NodeId B, DefStackMap &DefM);
+ void releaseBlock(NodeId B, DefStackMap &DefM);
+
+ PackedRegisterRef pack(RegisterRef RR) {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ PackedRegisterRef pack(RegisterRef RR) const {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ RegisterRef unpack(PackedRegisterRef PR) const {
+ return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
+ }
+
+ RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
+ RegisterRef makeRegRef(const MachineOperand &Op) const;
+ RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
+
+ NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create);
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
+ return BlockNodes.at(BB);
+ }
+
+ void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
+ unlinkUseDF(UA);
+ if (RemoveFromOwner)
+ removeFromOwner(UA);
+ }
+
+ void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
+ unlinkDefDF(DA);
+ if (RemoveFromOwner)
+ removeFromOwner(DA);
+ }
+
+ // Some useful filters.
+ template <uint16_t Kind>
+ static bool IsRef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == Kind;
+ }
+
+ template <uint16_t Kind>
+ static bool IsCode(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == Kind;
+ }
+
+ static bool IsDef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Def;
+ }
+
+ static bool IsUse(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Use;
+ }
+
+ static bool IsPhi(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == NodeAttrs::Phi;
+ }
+
+ static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
+ uint16_t Flags = DA.Addr->getFlags();
+ return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
+ }
+
+ private:
+ void reset();
+
+ RegisterSet getLandingPadLiveIns() const;
+
+ NodeAddr<NodeBase*> newNode(uint16_t Attrs);
+ NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
+ NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
+ RegisterRef RR, NodeAddr<BlockNode*> PredB,
+ uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
+ NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
+ MachineInstr *MI);
+ NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
+ MachineBasicBlock *BB);
+ NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
+
+ template <typename Predicate>
+ std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+ locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ Predicate P) const;
+
+ using BlockRefsMap = std::map<NodeId, RegisterSet>;
+
+ void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
+ void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA);
+ void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
+ NodeAddr<BlockNode*> BA);
+ void removeUnusedPhis();
+
+ void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
+ NodeAddr<T> TA, DefStack &DS);
+ template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
+ NodeAddr<StmtNode*> SA, Predicate P);
+ void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
+
+ void unlinkUseDF(NodeAddr<UseNode*> UA);
+ void unlinkDefDF(NodeAddr<DefNode*> DA);
+
+ void removeFromOwner(NodeAddr<RefNode*> RA) {
+ NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
+ IA.Addr->removeMember(RA, *this);
+ }
+
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const TargetOperandInfo &TOI;
+
+ RegisterAggr LiveIns;
+ NodeAddr<FuncNode*> Func;
+ NodeAllocator Memory;
+ // Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
+ std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
+ // Lane mask map.
+ LaneMaskIndex LMI;
+ }; // struct DataFlowGraph
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
+ bool NextOnly, const DataFlowGraph &G) {
+ // Get the "Next" reference in the circular list that references RR and
+ // satisfies predicate "Pred".
+ auto NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ NodeAddr<RefNode*> RA = NA;
+ if (RA.Addr->getRegRef(G) == RR && P(NA))
+ return NA;
+ if (NextOnly)
+ break;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ } else {
+ // We've hit the beginning of the chain.
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ NodeAddr<CodeNode*> CA = NA;
+ NA = CA.Addr->getFirstMember(G);
+ }
+ }
+ // Return the equivalent of "nullptr" if such a node was not found.
+ return NodeAddr<RefNode*>();
+ }
+
+ template <typename Predicate>
+ NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
+ NodeList MM;
+ auto M = getFirstMember(G);
+ if (M.Id == 0)
+ return MM;
+
+ while (M.Addr != this) {
+ if (P(M))
+ MM.push_back(M);
+ M = G.addr<NodeBase*>(M.Addr->getNext());
+ }
+ return MM;
+ }
+
+ template <typename T>
+ struct Print {
+ Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
+
+ const T &Obj;
+ const DataFlowGraph &G;
+ };
+
+ template <typename T>
+ struct PrintNode : Print<NodeAddr<T>> {
+ PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
+ : Print<NodeAddr<T>>(x, g) {}
+ };
+
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<PhiUseNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<StmtNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<InstrNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<BlockNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<FuncNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P);
+
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/RDFLiveness.h b/linux-x64/clang/include/llvm/CodeGen/RDFLiveness.h
new file mode 100644
index 0000000..d39d358
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/RDFLiveness.h
@@ -0,0 +1,175 @@
+//===- RDFLiveness.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Recalculate the liveness information given a data flow graph.
+// This includes block live-ins and kill flags.
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
+
+#include "RDFGraph.h"
+#include "RDFRegisters.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineDominanceFrontier;
+class MachineDominatorTree;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+
+} // namespace llvm
+
+namespace llvm {
+namespace rdf {
+namespace detail {
+
+using NodeRef = std::pair<NodeId, LaneBitmask>;
+
+} // namespace detail
+} // namespace rdf
+} // namespace llvm
+
+namespace std {
+
+template <> struct hash<llvm::rdf::detail::NodeRef> {
+ std::size_t operator()(llvm::rdf::detail::NodeRef R) const {
+ return std::hash<llvm::rdf::NodeId>{}(R.first) ^
+ std::hash<llvm::LaneBitmask::Type>{}(R.second.getAsInteger());
+ }
+};
+
+} // namespace std
+
+namespace llvm {
+namespace rdf {
+
+ struct Liveness {
+ public:
+ // This is really a std::map, except that it provides a non-trivial
+ // default constructor to the element accessed via [].
+ struct LiveMapType {
+ LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
+
+ RegisterAggr &operator[] (MachineBasicBlock *B) {
+ return Map.emplace(B, Empty).first->second;
+ }
+
+ private:
+ RegisterAggr Empty;
+ std::map<MachineBasicBlock*,RegisterAggr> Map;
+ };
+
+ using NodeRef = detail::NodeRef;
+ using NodeRefSet = std::unordered_set<NodeRef>;
+ using RefMap = std::unordered_map<RegisterId, NodeRefSet>;
+
+ Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
+ : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
+ MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
+
+ NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
+ return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
+ false, NoRegs);
+ }
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
+ return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
+ }
+
+ NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
+ const RegisterAggr &DefRRs);
+
+ NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
+ return getAllReachedUses(RefRR, DefA, NoRegs);
+ }
+
+ std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
+
+ NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA);
+
+ LiveMapType &getLiveMap() { return LiveMap; }
+ const LiveMapType &getLiveMap() const { return LiveMap; }
+
+ const RefMap &getRealUses(NodeId P) const {
+ auto F = RealUseMap.find(P);
+ return F == RealUseMap.end() ? Empty : F->second;
+ }
+
+ void computePhiInfo();
+ void computeLiveIns();
+ void resetLiveIns();
+ void resetKills();
+ void resetKills(MachineBasicBlock *B);
+
+ void trace(bool T) { Trace = T; }
+
+ private:
+ const DataFlowGraph &DFG;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo &PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ LiveMapType LiveMap;
+ const RefMap Empty;
+ const RegisterAggr NoRegs;
+ bool Trace = false;
+
+ // Cache of mapping from node ids (for RefNodes) to the containing
+ // basic blocks. Not computing it each time for each node reduces
+ // the liveness calculation time by a large fraction.
+ DenseMap<NodeId, MachineBasicBlock *> NBMap;
+
+ // Phi information:
+ //
+ // RealUseMap
+ // map: NodeId -> (map: RegisterId -> NodeRefSet)
+ // phi id -> (map: register -> set of reached non-phi uses)
+ DenseMap<NodeId, RefMap> RealUseMap;
+
+ // Inverse iterated dominance frontier.
+ std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
+
+ // Live on entry.
+ std::map<MachineBasicBlock*,RefMap> PhiLON;
+
+ // Phi uses are considered to be located at the end of the block that
+ // they are associated with. The reaching def of a phi use dominates the
+ // block that the use corresponds to, but not the block that contains
+ // the phi itself. To include these uses in the liveness propagation (up
+ // the dominator tree), create a map: block -> set of uses live on exit.
+ std::map<MachineBasicBlock*,RefMap> PhiLOX;
+
+ MachineBasicBlock *getBlockWithRef(NodeId RN) const;
+ void traverse(MachineBasicBlock *B, RefMap &LiveIn);
+ void emptify(RefMap &M);
+
+ std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest);
+ };
+
+ raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
+
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/RDFRegisters.h b/linux-x64/clang/include/llvm/CodeGen/RDFRegisters.h
new file mode 100644
index 0000000..c49b488
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/RDFRegisters.h
@@ -0,0 +1,279 @@
+//===- RDFRegisters.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+class MachineFunction;
+class raw_ostream;
+
+namespace rdf {
+
+ using RegisterId = uint32_t;
+
+ // Template class for a map translating uint32_t into arbitrary types.
+ // The map will act like an indexed set: upon insertion of a new object,
+ // it will automatically assign a new index to it. Index of 0 is treated
+ // as invalid and is never allocated.
+ template <typename T, unsigned N = 32>
+ struct IndexedSet {
+ IndexedSet() { Map.reserve(N); }
+
+ T get(uint32_t Idx) const {
+ // Index Idx corresponds to Map[Idx-1].
+ assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
+ return Map[Idx-1];
+ }
+
+ uint32_t insert(T Val) {
+ // Linear search.
+ auto F = llvm::find(Map, Val);
+ if (F != Map.end())
+ return F - Map.begin() + 1;
+ Map.push_back(Val);
+ return Map.size(); // Return actual_index + 1.
+ }
+
+ uint32_t find(T Val) const {
+ auto F = llvm::find(Map, Val);
+ assert(F != Map.end());
+ return F - Map.begin() + 1;
+ }
+
+ uint32_t size() const { return Map.size(); }
+
+ using const_iterator = typename std::vector<T>::const_iterator;
+
+ const_iterator begin() const { return Map.begin(); }
+ const_iterator end() const { return Map.end(); }
+
+ private:
+ std::vector<T> Map;
+ };
+
+ struct RegisterRef {
+ RegisterId Reg = 0;
+ LaneBitmask Mask = LaneBitmask::getNone();
+
+ RegisterRef() = default;
+ explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
+ : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
+ operator bool() const {
+ return Reg != 0 && Mask.any();
+ }
+
+ bool operator== (const RegisterRef &RR) const {
+ return Reg == RR.Reg && Mask == RR.Mask;
+ }
+
+ bool operator!= (const RegisterRef &RR) const {
+ return !operator==(RR);
+ }
+
+ bool operator< (const RegisterRef &RR) const {
+ return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
+ }
+
+ size_t hash() const {
+ return std::hash<RegisterId>{}(Reg) ^
+ std::hash<LaneBitmask::Type>{}(Mask.getAsInteger());
+ }
+ };
+
+
+ struct PhysicalRegisterInfo {
+ PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf);
+
+ static bool isRegMaskId(RegisterId R) {
+ return Register::isStackSlot(R);
+ }
+
+ RegisterId getRegMaskId(const uint32_t *RM) const {
+ return Register::index2StackSlot(RegMasks.find(RM));
+ }
+
+ const uint32_t *getRegMaskBits(RegisterId R) const {
+ return RegMasks.get(Register::stackSlot2Index(R));
+ }
+
+ bool alias(RegisterRef RA, RegisterRef RB) const {
+ if (!isRegMaskId(RA.Reg))
+ return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
+ return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
+ }
+
+ std::set<RegisterId> getAliasSet(RegisterId Reg) const;
+
+ RegisterRef getRefForUnit(uint32_t U) const {
+ return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
+ }
+
+ const BitVector &getMaskUnits(RegisterId MaskId) const {
+ return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
+ }
+
+ const BitVector &getUnitAliases(uint32_t U) const {
+ return AliasInfos[U].Regs;
+ }
+
+ RegisterRef mapTo(RegisterRef RR, unsigned R) const;
+ const TargetRegisterInfo &getTRI() const { return TRI; }
+
+ private:
+ struct RegInfo {
+ const TargetRegisterClass *RegClass = nullptr;
+ };
+ struct UnitInfo {
+ RegisterId Reg = 0;
+ LaneBitmask Mask;
+ };
+ struct MaskInfo {
+ BitVector Units;
+ };
+ struct AliasInfo {
+ BitVector Regs;
+ };
+
+ const TargetRegisterInfo &TRI;
+ IndexedSet<const uint32_t*> RegMasks;
+ std::vector<RegInfo> RegInfos;
+ std::vector<UnitInfo> UnitInfos;
+ std::vector<MaskInfo> MaskInfos;
+ std::vector<AliasInfo> AliasInfos;
+
+ bool aliasRR(RegisterRef RA, RegisterRef RB) const;
+ bool aliasRM(RegisterRef RR, RegisterRef RM) const;
+ bool aliasMM(RegisterRef RM, RegisterRef RN) const;
+ };
+
+ struct RegisterAggr {
+ RegisterAggr(const PhysicalRegisterInfo &pri)
+ : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
+ RegisterAggr(const RegisterAggr &RG) = default;
+
+ unsigned count() const { return Units.count(); }
+ bool empty() const { return Units.none(); }
+ bool hasAliasOf(RegisterRef RR) const;
+ bool hasCoverOf(RegisterRef RR) const;
+
+ bool operator==(const RegisterAggr &A) const {
+ return DenseMapInfo<BitVector>::isEqual(Units, A.Units);
+ }
+
+ static bool isCoverOf(RegisterRef RA, RegisterRef RB,
+ const PhysicalRegisterInfo &PRI) {
+ return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
+ }
+
+ RegisterAggr &insert(RegisterRef RR);
+ RegisterAggr &insert(const RegisterAggr &RG);
+ RegisterAggr &intersect(RegisterRef RR);
+ RegisterAggr &intersect(const RegisterAggr &RG);
+ RegisterAggr &clear(RegisterRef RR);
+ RegisterAggr &clear(const RegisterAggr &RG);
+
+ RegisterRef intersectWith(RegisterRef RR) const;
+ RegisterRef clearIn(RegisterRef RR) const;
+ RegisterRef makeRegRef() const;
+
+ size_t hash() const {
+ return DenseMapInfo<BitVector>::getHashValue(Units);
+ }
+
+ void print(raw_ostream &OS) const;
+
+ struct rr_iterator {
+ using MapType = std::map<RegisterId, LaneBitmask>;
+
+ private:
+ MapType Masks;
+ MapType::iterator Pos;
+ unsigned Index;
+ const RegisterAggr *Owner;
+
+ public:
+ rr_iterator(const RegisterAggr &RG, bool End);
+
+ RegisterRef operator*() const {
+ return RegisterRef(Pos->first, Pos->second);
+ }
+
+ rr_iterator &operator++() {
+ ++Pos;
+ ++Index;
+ return *this;
+ }
+
+ bool operator==(const rr_iterator &I) const {
+ assert(Owner == I.Owner);
+ (void)Owner;
+ return Index == I.Index;
+ }
+
+ bool operator!=(const rr_iterator &I) const {
+ return !(*this == I);
+ }
+ };
+
+ rr_iterator rr_begin() const {
+ return rr_iterator(*this, false);
+ }
+ rr_iterator rr_end() const {
+ return rr_iterator(*this, true);
+ }
+
+ private:
+ BitVector Units;
+ const PhysicalRegisterInfo &PRI;
+ };
+
+ // Optionally print the lane mask, if it is not ~0.
+ struct PrintLaneMaskOpt {
+ PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
+ LaneBitmask Mask;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+
+ raw_ostream &operator<< (raw_ostream &OS, const RegisterAggr &A);
+} // end namespace rdf
+
+} // end namespace llvm
+
+namespace std {
+ template <> struct hash<llvm::rdf::RegisterRef> {
+ size_t operator()(llvm::rdf::RegisterRef A) const {
+ return A.hash();
+ }
+ };
+ template <> struct hash<llvm::rdf::RegisterAggr> {
+ size_t operator()(const llvm::rdf::RegisterAggr &A) const {
+ return A.hash();
+ }
+ };
+ template <> struct equal_to<llvm::rdf::RegisterAggr> {
+ bool operator()(const llvm::rdf::RegisterAggr &A,
+ const llvm::rdf::RegisterAggr &B) const {
+ return A == B;
+ }
+ };
+}
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/ReachingDefAnalysis.h b/linux-x64/clang/include/llvm/CodeGen/ReachingDefAnalysis.h
index a599fb6..bcb48de 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -23,19 +23,54 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/LoopTraversal.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
class MachineBasicBlock;
class MachineInstr;
+/// Thin wrapper around "int" used to store reaching definitions,
+/// using an encoding that makes it compatible with TinyPtrVector.
+/// The 0th LSB is forced zero (and will be used for pointer union tagging),
+/// The 1st LSB is forced one (to make sure the value is non-zero).
+class ReachingDef {
+ uintptr_t Encoded;
+ friend struct PointerLikeTypeTraits<ReachingDef>;
+ explicit ReachingDef(uintptr_t Encoded) : Encoded(Encoded) {}
+
+public:
+ ReachingDef(std::nullptr_t) : Encoded(0) {}
+ ReachingDef(int Instr) : Encoded(((uintptr_t) Instr << 2) | 2) {}
+ operator int() const { return ((int) Encoded) >> 2; }
+};
+
+template<>
+struct PointerLikeTypeTraits<ReachingDef> {
+ static constexpr int NumLowBitsAvailable = 1;
+
+ static inline void *getAsVoidPointer(const ReachingDef &RD) {
+ return reinterpret_cast<void *>(RD.Encoded);
+ }
+
+ static inline ReachingDef getFromVoidPointer(void *P) {
+ return ReachingDef(reinterpret_cast<uintptr_t>(P));
+ }
+
+ static inline ReachingDef getFromVoidPointer(const void *P) {
+ return ReachingDef(reinterpret_cast<uintptr_t>(P));
+ }
+};
+
/// This class provides the reaching def analysis.
class ReachingDefAnalysis : public MachineFunctionPass {
private:
MachineFunction *MF;
const TargetRegisterInfo *TRI;
+ LoopTraversal::TraversalOrder TraversedMBBOrder;
unsigned NumRegUnits;
/// Instruction that defined each register, relative to the beginning of the
/// current basic block. When a LiveRegsDefInfo is used to represent a
@@ -54,12 +89,12 @@
/// The first instruction in each basic block is 0.
int CurInstr;
- /// Maps instructions to their instruction Ids, relative to the begining of
+ /// Maps instructions to their instruction Ids, relative to the beginning of
/// their basic blocks.
DenseMap<MachineInstr *, int> InstIds;
/// All reaching defs of a given RegUnit for a given MBB.
- using MBBRegUnitDefs = SmallVector<int, 1>;
+ using MBBRegUnitDefs = TinyPtrVector<ReachingDef>;
/// All reaching defs of all reg units for a given MBB
using MBBDefsInfo = std::vector<MBBRegUnitDefs>;
/// All reaching defs of all reg units for a all MBBs
@@ -69,6 +104,9 @@
/// Default values are 'nothing happened a long time ago'.
const int ReachingDefDefaultVal = -(1 << 20);
+ using InstSet = SmallPtrSetImpl<MachineInstr*>;
+ using BlockSet = SmallPtrSetImpl<MachineBasicBlock*>;
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -86,30 +124,156 @@
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ MachineFunctionProperties::Property::NoVRegs).set(
+ MachineFunctionProperties::Property::TracksLiveness);
}
+ /// Re-run the analysis.
+ void reset();
+
+ /// Initialize data structures.
+ void init();
+
+ /// Traverse the machine function, mapping definitions.
+ void traverse();
+
/// Provides the instruction id of the closest reaching def instruction of
/// PhysReg that reaches MI, relative to the begining of MI's basic block.
- int getReachingDef(MachineInstr *MI, int PhysReg);
+ int getReachingDef(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Return whether A and B use the same def of PhysReg.
+ bool hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+ MCRegister PhysReg) const;
+
+ /// Return whether the reaching def for MI also is live out of its parent
+ /// block.
+ bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Return the local MI that produces the live out value for PhysReg, or
+ /// nullptr for a non-live out or non-local def.
+ MachineInstr *getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ MCRegister PhysReg) const;
+
+ /// If a single MachineInstr creates the reaching definition, then return it.
+ /// Otherwise return null.
+ MachineInstr *getUniqueReachingMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const;
+
+ /// If a single MachineInstr creates the reaching definition, for MIs operand
+ /// at Idx, then return it. Otherwise return null.
+ MachineInstr *getMIOperand(MachineInstr *MI, unsigned Idx) const;
+
+ /// If a single MachineInstr creates the reaching definition, for MIs MO,
+ /// then return it. Otherwise return null.
+ MachineInstr *getMIOperand(MachineInstr *MI, MachineOperand &MO) const;
+
+ /// Provide whether the register has been defined in the same basic block as,
+ /// and before, MI.
+ bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Return whether the given register is used after MI, whether it's a local
+ /// use or a live out.
+ bool isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Return whether the given register is defined after MI.
+ bool isRegDefinedAfter(MachineInstr *MI, MCRegister PhysReg) const;
/// Provides the clearance - the number of instructions since the closest
/// reaching def instuction of PhysReg that reaches MI.
- int getClearance(MachineInstr *MI, MCPhysReg PhysReg);
+ int getClearance(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Provides the uses, in the same block as MI, of register that MI defines.
+ /// This does not consider live-outs.
+ void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Uses) const;
+
+ /// Search MBB for a definition of PhysReg and insert it into Defs. If no
+ /// definition is found, recursively search the predecessor blocks for them.
+ void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg, InstSet &Defs,
+ BlockSet &VisitedBBs) const;
+ void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg,
+ InstSet &Defs) const;
+
+ /// For the given block, collect the instructions that use the live-in
+ /// value of the provided register. Return whether the value is still
+ /// live on exit.
+ bool getLiveInUses(MachineBasicBlock *MBB, MCRegister PhysReg,
+ InstSet &Uses) const;
+
+ /// Collect the users of the value stored in PhysReg, which is defined
+ /// by MI.
+ void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const;
+
+ /// Collect all possible definitions of the value stored in PhysReg, which is
+ /// used by MI.
+ void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Defs) const;
+
+ /// Return whether From can be moved forwards to just before To.
+ bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const;
+
+ /// Return whether From can be moved backwards to just after To.
+ bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const;
+
+ /// Assuming MI is dead, recursively search the incoming operands which are
+ /// killed by MI and collect those that would become dead.
+ void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const;
+
+ /// Return whether removing this instruction will have no effect on the
+ /// program, returning the redundant use-def chain.
+ bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const;
+
+ /// Return whether removing this instruction will have no effect on the
+ /// program, ignoring the possible effects on some instructions, returning
+ /// the redundant use-def chain.
+ bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove,
+ InstSet &Ignore) const;
+
+ /// Return whether a MachineInstr could be inserted at MI and safely define
+ /// the given register without affecting the program.
+ bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const;
+
+ /// Return whether a MachineInstr could be inserted at MI and safely define
+ /// the given register without affecting the program, ignoring any effects
+ /// on the provided instructions.
+ bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Ignore) const;
private:
/// Set up LiveRegs by merging predecessor live-out values.
- void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+ void enterBasicBlock(MachineBasicBlock *MBB);
/// Update live-out values.
- void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+ void leaveBasicBlock(MachineBasicBlock *MBB);
/// Process he given basic block.
void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+ /// Process block that is part of a loop again.
+ void reprocessBasicBlock(MachineBasicBlock *MBB);
+
/// Update def-ages for registers defined by MI.
/// Also break dependencies on partial defs and undef uses.
void processDefs(MachineInstr *);
+
+ /// Utility function for isSafeToMoveForwards/Backwards.
+ template<typename Iterator>
+ bool isSafeToMove(MachineInstr *From, MachineInstr *To) const;
+
+ /// Return whether removing this instruction will have no effect on the
+ /// program, ignoring the possible effects on some instructions, returning
+ /// the redundant use-def chain.
+ bool isSafeToRemove(MachineInstr *MI, InstSet &Visited,
+ InstSet &ToRemove, InstSet &Ignore) const;
+
+ /// Provides the MI, from the given block, corresponding to the Id or a
+ /// nullptr if the id does not refer to the block.
+ MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId) const;
+
+ /// Provides the instruction of the closest reaching def instruction of
+ /// PhysReg that reaches MI, relative to the begining of MI's basic block.
+ MachineInstr *getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const;
};
} // namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/RegAllocPBQP.h b/linux-x64/clang/include/llvm/CodeGen/RegAllocPBQP.h
index f7f9224..1ed5508 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RegAllocPBQP.h
@@ -22,6 +22,8 @@
#include "llvm/CodeGen/PBQP/Math.h"
#include "llvm/CodeGen/PBQP/ReductionRules.h"
#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
@@ -96,13 +98,13 @@
AllowedRegVector() = default;
AllowedRegVector(AllowedRegVector &&) = default;
- AllowedRegVector(const std::vector<unsigned> &OptVec)
- : NumOpts(OptVec.size()), Opts(new unsigned[NumOpts]) {
+ AllowedRegVector(const std::vector<MCRegister> &OptVec)
+ : NumOpts(OptVec.size()), Opts(new MCRegister[NumOpts]) {
std::copy(OptVec.begin(), OptVec.end(), Opts.get());
}
unsigned size() const { return NumOpts; }
- unsigned operator[](size_t I) const { return Opts[I]; }
+ MCRegister operator[](size_t I) const { return Opts[I]; }
bool operator==(const AllowedRegVector &Other) const {
if (NumOpts != Other.NumOpts)
@@ -116,12 +118,12 @@
private:
unsigned NumOpts = 0;
- std::unique_ptr<unsigned[]> Opts;
+ std::unique_ptr<MCRegister[]> Opts;
};
inline hash_code hash_value(const AllowedRegVector &OptRegs) {
- unsigned *OStart = OptRegs.Opts.get();
- unsigned *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
+ MCRegister *OStart = OptRegs.Opts.get();
+ MCRegister *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
return hash_combine(OptRegs.NumOpts,
hash_combine_range(OStart, OEnd));
}
@@ -143,11 +145,11 @@
LiveIntervals &LIS;
MachineBlockFrequencyInfo &MBFI;
- void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) {
- VRegToNodeId[VReg] = NId;
+ void setNodeIdForVReg(Register VReg, GraphBase::NodeId NId) {
+ VRegToNodeId[VReg.id()] = NId;
}
- GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const {
+ GraphBase::NodeId getNodeIdForVReg(Register VReg) const {
auto VRegItr = VRegToNodeId.find(VReg);
if (VRegItr == VRegToNodeId.end())
return GraphBase::invalidNodeId();
@@ -159,7 +161,7 @@
}
private:
- DenseMap<unsigned, GraphBase::NodeId> VRegToNodeId;
+ DenseMap<Register, GraphBase::NodeId> VRegToNodeId;
AllowedRegVecPool AllowedRegVecs;
};
@@ -197,8 +199,8 @@
NodeMetadata(NodeMetadata &&) = default;
NodeMetadata& operator=(NodeMetadata &&) = default;
- void setVReg(unsigned VReg) { this->VReg = VReg; }
- unsigned getVReg() const { return VReg; }
+ void setVReg(Register VReg) { this->VReg = VReg; }
+ Register getVReg() const { return VReg; }
void setAllowedRegs(GraphMetadata::AllowedRegVecRef AllowedRegs) {
this->AllowedRegs = std::move(AllowedRegs);
@@ -256,7 +258,7 @@
unsigned NumOpts = 0;
unsigned DeniedOpts = 0;
std::unique_ptr<unsigned[]> OptUnsafeEdges;
- unsigned VReg = 0;
+ Register VReg;
GraphMetadata::AllowedRegVecRef AllowedRegs;
#ifndef NDEBUG
diff --git a/linux-x64/clang/include/llvm/CodeGen/Register.h b/linux-x64/clang/include/llvm/CodeGen/Register.h
index 907c1a9..d7057cf 100644
--- a/linux-x64/clang/include/llvm/CodeGen/Register.h
+++ b/linux-x64/clang/include/llvm/CodeGen/Register.h
@@ -9,6 +9,7 @@
#ifndef LLVM_CODEGEN_REGISTER_H
#define LLVM_CODEGEN_REGISTER_H
+#include "llvm/MC/MCRegister.h"
#include <cassert>
namespace llvm {
@@ -19,42 +20,146 @@
unsigned Reg;
public:
- Register(unsigned Val = 0): Reg(Val) {}
+ constexpr Register(unsigned Val = 0): Reg(Val) {}
+ constexpr Register(MCRegister Val): Reg(Val) {}
- /// Return true if the specified register number is in the virtual register
- /// namespace.
- bool isVirtual() const {
- return int(Reg) < 0;
+ // Register numbers can represent physical registers, virtual registers, and
+ // sometimes stack slots. The unsigned values are divided into these ranges:
+ //
+ // 0 Not a register, can be used as a sentinel.
+ // [1;2^30) Physical registers assigned by TableGen.
+ // [2^30;2^31) Stack slots. (Rarely used.)
+ // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+ //
+ // Further sentinels can be allocated from the small negative integers.
+ // DenseMapInfo<unsigned> uses -1u and -2u.
+ static_assert(std::numeric_limits<decltype(Reg)>::max() >= 0xFFFFFFFF,
+ "Reg isn't large enough to hold full range.");
+
+ /// isStackSlot - Sometimes it is useful the be able to store a non-negative
+ /// frame index in a variable that normally holds a register. isStackSlot()
+ /// returns true if Reg is in the range used for stack slots.
+ ///
+ /// FIXME: remove in favor of member.
+ static bool isStackSlot(unsigned Reg) {
+ return MCRegister::isStackSlot(Reg);
}
- /// Return true if the specified register number is in the physical register
- /// namespace.
- bool isPhysical() const {
- return int(Reg) > 0;
+ /// Return true if this is a stack slot.
+ bool isStack() const { return MCRegister::isStackSlot(Reg); }
+
+ /// Compute the frame index from a register value representing a stack slot.
+ static int stackSlot2Index(Register Reg) {
+ assert(Reg.isStack() && "Not a stack slot");
+ return int(Reg - MCRegister::FirstStackSlot);
}
- /// Convert a virtual register number to a 0-based index. The first virtual
- /// register in a function will get the index 0.
- unsigned virtRegIndex() const {
- assert(isVirtual() && "Not a virtual register");
- return Reg & ~(1u << 31);
+ /// Convert a non-negative frame index to a stack slot register value.
+ static Register index2StackSlot(int FI) {
+ assert(FI >= 0 && "Cannot hold a negative frame index.");
+ return Register(FI + MCRegister::FirstStackSlot);
+ }
+
+ /// Return true if the specified register number is in
+ /// the physical register namespace.
+ static bool isPhysicalRegister(unsigned Reg) {
+ return MCRegister::isPhysicalRegister(Reg);
+ }
+
+ /// Return true if the specified register number is in
+ /// the virtual register namespace.
+ static bool isVirtualRegister(unsigned Reg) {
+ return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg);
+ }
+
+ /// Convert a virtual register number to a 0-based index.
+ /// The first virtual register in a function will get the index 0.
+ static unsigned virtReg2Index(Register Reg) {
+ assert(isVirtualRegister(Reg) && "Not a virtual register");
+ return Reg & ~MCRegister::VirtualRegFlag;
}
/// Convert a 0-based index to a virtual register number.
/// This is the inverse operation of VirtReg2IndexFunctor below.
static Register index2VirtReg(unsigned Index) {
- return Register(Index | (1u << 31));
+ assert(Index < (1u << 31) && "Index too large for virtual register range.");
+ return Index | MCRegister::VirtualRegFlag;
}
- operator unsigned() const {
+ /// Return true if the specified register number is in the virtual register
+ /// namespace.
+ bool isVirtual() const {
+ return isVirtualRegister(Reg);
+ }
+
+ /// Return true if the specified register number is in the physical register
+ /// namespace.
+ bool isPhysical() const {
+ return isPhysicalRegister(Reg);
+ }
+
+ /// Convert a virtual register number to a 0-based index. The first virtual
+ /// register in a function will get the index 0.
+ unsigned virtRegIndex() const {
+ return virtReg2Index(Reg);
+ }
+
+ constexpr operator unsigned() const {
return Reg;
}
- bool isValid() const {
- return Reg != 0;
+ unsigned id() const { return Reg; }
+
+ operator MCRegister() const {
+ return MCRegister(Reg);
+ }
+
+ /// Utility to check-convert this value to a MCRegister. The caller is
+ /// expected to have already validated that this Register is, indeed,
+ /// physical.
+ MCRegister asMCReg() const {
+ assert(Reg == MCRegister::NoRegister ||
+ MCRegister::isPhysicalRegister(Reg));
+ return MCRegister(Reg);
+ }
+
+ bool isValid() const { return Reg != MCRegister::NoRegister; }
+
+ /// Comparisons between register objects
+ bool operator==(const Register &Other) const { return Reg == Other.Reg; }
+ bool operator!=(const Register &Other) const { return Reg != Other.Reg; }
+ bool operator==(const MCRegister &Other) const { return Reg == Other.id(); }
+ bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); }
+
+ /// Comparisons against register constants. E.g.
+ /// * R == AArch64::WZR
+ /// * R == 0
+ /// * R == VirtRegMap::NO_PHYS_REG
+ bool operator==(unsigned Other) const { return Reg == Other; }
+ bool operator!=(unsigned Other) const { return Reg != Other; }
+ bool operator==(int Other) const { return Reg == unsigned(Other); }
+ bool operator!=(int Other) const { return Reg != unsigned(Other); }
+ // MSVC requires that we explicitly declare these two as well.
+ bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); }
+ bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); }
+};
+
+// Provide DenseMapInfo for Register
+template<> struct DenseMapInfo<Register> {
+ static inline unsigned getEmptyKey() {
+ return DenseMapInfo<unsigned>::getEmptyKey();
+ }
+ static inline unsigned getTombstoneKey() {
+ return DenseMapInfo<unsigned>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const Register &Val) {
+ return DenseMapInfo<unsigned>::getHashValue(Val.id());
+ }
+ static bool isEqual(const Register &LHS, const Register &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.id(), RHS.id());
}
};
}
-#endif
+#endif // ifndef LLVM_CODEGEN_REGISTER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/RegisterClassInfo.h b/linux-x64/clang/include/llvm/CodeGen/RegisterClassInfo.h
index 14af5c4..25b310c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RegisterClassInfo.h
@@ -110,7 +110,7 @@
/// getLastCalleeSavedAlias - Returns the last callee saved register that
/// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases.
unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
if (PhysReg < CalleeSavedAliases.size())
return CalleeSavedAliases[PhysReg];
return 0;
diff --git a/linux-x64/clang/include/llvm/CodeGen/RegisterPressure.h b/linux-x64/clang/include/llvm/CodeGen/RegisterPressure.h
index 5bbaa03..1deeb4d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RegisterPressure.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RegisterPressure.h
@@ -37,10 +37,10 @@
class RegisterClassInfo;
struct RegisterMaskPair {
- unsigned RegUnit; ///< Virtual register or register unit.
+ Register RegUnit; ///< Virtual register or register unit.
LaneBitmask LaneMask;
- RegisterMaskPair(unsigned RegUnit, LaneBitmask LaneMask)
+ RegisterMaskPair(Register RegUnit, LaneBitmask LaneMask)
: RegUnit(RegUnit), LaneMask(LaneMask) {}
};
@@ -129,6 +129,8 @@
bool operator==(const PressureChange &RHS) const {
return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc;
}
+
+ void dump() const;
};
/// List of PressureChanges in order of increasing, unique PSetID.
@@ -155,7 +157,7 @@
const_iterator begin() const { return &PressureChanges[0]; }
const_iterator end() const { return &PressureChanges[MaxPSets]; }
- void addPressureChange(unsigned RegUnit, bool IsDec,
+ void addPressureChange(Register RegUnit, bool IsDec,
const MachineRegisterInfo *MRI);
void dump(const TargetRegisterInfo &TRI) const;
@@ -248,6 +250,7 @@
bool operator!=(const RegPressureDelta &RHS) const {
return !operator==(RHS);
}
+ void dump() const;
};
/// A set of live virtual registers and physical register units.
@@ -272,24 +275,24 @@
RegSet Regs;
unsigned NumRegUnits;
- unsigned getSparseIndexFromReg(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits;
+ unsigned getSparseIndexFromReg(Register Reg) const {
+ if (Reg.isVirtual())
+ return Register::virtReg2Index(Reg) + NumRegUnits;
assert(Reg < NumRegUnits);
return Reg;
}
- unsigned getRegFromSparseIndex(unsigned SparseIndex) const {
+ Register getRegFromSparseIndex(unsigned SparseIndex) const {
if (SparseIndex >= NumRegUnits)
- return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits);
- return SparseIndex;
+ return Register::index2VirtReg(SparseIndex - NumRegUnits);
+ return Register(SparseIndex);
}
public:
void clear();
void init(const MachineRegisterInfo &MRI);
- LaneBitmask contains(unsigned Reg) const {
+ LaneBitmask contains(Register Reg) const {
unsigned SparseIndex = getSparseIndexFromReg(Reg);
RegSet::const_iterator I = Regs.find(SparseIndex);
if (I == Regs.end())
@@ -329,7 +332,7 @@
template<typename ContainerT>
void appendTo(ContainerT &To) const {
for (const IndexMaskPair &P : Regs) {
- unsigned Reg = getRegFromSparseIndex(P.Index);
+ Register Reg = getRegFromSparseIndex(P.Index);
if (P.LaneMask.any())
To.push_back(RegisterMaskPair(Reg, P.LaneMask));
}
@@ -387,7 +390,7 @@
LiveRegSet LiveRegs;
/// Set of vreg defs that start a live range.
- SparseSet<unsigned, VirtReg2IndexFunctor> UntiedDefs;
+ SparseSet<Register, VirtReg2IndexFunctor> UntiedDefs;
/// Live-through pressure.
std::vector<unsigned> LiveThruPressure;
@@ -529,7 +532,7 @@
return getDownwardPressure(MI, PressureResult, MaxPressureResult);
}
- bool hasUntiedDef(unsigned VirtReg) const {
+ bool hasUntiedDef(Register VirtReg) const {
return UntiedDefs.count(VirtReg);
}
@@ -545,9 +548,9 @@
/// after the current position.
SlotIndex getCurrSlot() const;
- void increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+ void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
LaneBitmask NewMask);
- void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+ void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
LaneBitmask NewMask);
void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
@@ -558,9 +561,9 @@
void discoverLiveInOrOut(RegisterMaskPair Pair,
SmallVectorImpl<RegisterMaskPair> &LiveInOrOut);
- LaneBitmask getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const;
- LaneBitmask getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const;
- LaneBitmask getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLiveLanesAt(Register RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLiveThroughAt(Register RegUnit, SlotIndex Pos) const;
};
void dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
diff --git a/linux-x64/clang/include/llvm/CodeGen/RegisterScavenging.h b/linux-x64/clang/include/llvm/CodeGen/RegisterScavenging.h
index 9c48df8..4f48ea2 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RegisterScavenging.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RegisterScavenging.h
@@ -51,7 +51,7 @@
/// If non-zero, the specific register is currently being
/// scavenged. That is, it is spilled to this scavenging stack slot.
- unsigned Reg = 0;
+ Register Reg;
/// The instruction that restores the scavenged register from stack.
const MachineInstr *Restore = nullptr;
@@ -89,15 +89,6 @@
while (MBBI != I) forward();
}
- /// Invert the behavior of forward() on the current instruction (undo the
- /// changes to the available registers made by forward()).
- void unprocess();
-
- /// Unprocess instructions until you reach the provided iterator.
- void unprocess(MachineBasicBlock::iterator I) {
- while (MBBI != I) unprocess();
- }
-
/// Update internal register state and move MBB iterator backwards.
/// Contrary to unprocess() this method gives precise results even in the
/// absence of kill flags.
@@ -119,14 +110,14 @@
MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; }
/// Return if a specific register is currently used.
- bool isRegUsed(unsigned Reg, bool includeReserved = true) const;
+ bool isRegUsed(Register Reg, bool includeReserved = true) const;
/// Return all available registers in the register class in Mask.
BitVector getRegsAvailable(const TargetRegisterClass *RC);
/// Find an unused register of the specified register class.
/// Return 0 if none is found.
- unsigned FindUnusedReg(const TargetRegisterClass *RC) const;
+ Register FindUnusedReg(const TargetRegisterClass *RC) const;
/// Add a scavenging frame index.
void addScavengingFrameIndex(int FI) {
@@ -160,10 +151,10 @@
///
/// If \p AllowSpill is false, fail if a spill is required to make the
/// register available, and return NoRegister.
- unsigned scavengeRegister(const TargetRegisterClass *RC,
+ Register scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I, int SPAdj,
bool AllowSpill = true);
- unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
+ Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
bool AllowSpill = true) {
return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill);
}
@@ -177,17 +168,17 @@
///
/// If \p AllowSpill is false, fail if a spill is required to make the
/// register available, and return NoRegister.
- unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ Register scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
bool AllowSpill = true);
/// Tell the scavenger a register is used.
- void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
+ void setRegUsed(Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
private:
/// Returns true if a register is reserved. It is never "unused".
- bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
+ bool isReserved(Register Reg) const { return MRI->isReserved(Reg); }
/// setUsed / setUnused - Mark the state of one or a number of register units.
///
@@ -203,16 +194,16 @@
void determineKillsAndDefs();
/// Add all Reg Units that Reg contains to BV.
- void addRegUnits(BitVector &BV, unsigned Reg);
+ void addRegUnits(BitVector &BV, MCRegister Reg);
/// Remove all Reg Units that \p Reg contains from \p BV.
- void removeRegUnits(BitVector &BV, unsigned Reg);
+ void removeRegUnits(BitVector &BV, MCRegister Reg);
/// Return the candidate register that is unused for the longest after
/// StartMI. UseMI is set to the instruction where the search stopped.
///
/// No more than InstrLimit instructions are inspected.
- unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ Register findSurvivorReg(MachineBasicBlock::iterator StartMI,
BitVector &Candidates,
unsigned InstrLimit,
MachineBasicBlock::iterator &UseMI);
@@ -225,7 +216,7 @@
/// Spill a register after position \p After and reload it before position
/// \p UseMI.
- ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+ ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
MachineBasicBlock::iterator Before,
MachineBasicBlock::iterator &UseMI);
};
diff --git a/linux-x64/clang/include/llvm/CodeGen/RegisterUsageInfo.h b/linux-x64/clang/include/llvm/CodeGen/RegisterUsageInfo.h
index 3355455..53982ce 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cstdint>
#include <vector>
diff --git a/linux-x64/clang/include/llvm/CodeGen/ResourcePriorityQueue.h b/linux-x64/clang/include/llvm/CodeGen/ResourcePriorityQueue.h
index 81587a3..bd63dd8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -16,15 +16,15 @@
#ifndef LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H
#define LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H
-#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
namespace llvm {
+ class DFAPacketizer;
+ class InstrItineraryData;
class ResourcePriorityQueue;
+ class SelectionDAGISel;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
/// Sorting functions for the Available queue.
struct resource_sort {
@@ -107,7 +107,6 @@
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void initNumRegDefsLeft(SUnit *SU);
- void updateNumRegDefsLeft(SUnit *SU);
int regPressureDelta(SUnit *SU, bool RawPressure = false);
int rawRegPressureDelta (SUnit *SU, unsigned RCId);
diff --git a/linux-x64/clang/include/llvm/CodeGen/RuntimeLibcalls.h b/linux-x64/clang/include/llvm/CodeGen/RuntimeLibcalls.h
index f71f39e..86e24ca 100644
--- a/linux-x64/clang/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/linux-x64/clang/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_RUNTIMELIBCALLS_H
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
namespace RTLIB {
@@ -60,6 +61,10 @@
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);
+ /// Return the outline atomics value for the given opcode, atomic ordering
+ /// and type, or UNKNOWN_LIBCALL if there is none.
+ Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
+
/// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return
/// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
/// UNKNOW_LIBCALL if there is none.
diff --git a/linux-x64/clang/include/llvm/CodeGen/ScheduleDAG.h b/linux-x64/clang/include/llvm/CodeGen/ScheduleDAG.h
index e004f3b..4c8d047 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ScheduleDAG.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ScheduleDAG.h
@@ -724,6 +724,10 @@
public:
ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
+ /// Add a SUnit without predecessors to the end of the topological order. It
+ /// also must be the first new node added to the DAG.
+ void AddSUnitWithoutPredecessors(const SUnit *SU);
+
/// Creates the initial topological ordering from the DAG to be scheduled.
void InitDAGTopologicalSorting();
diff --git a/linux-x64/clang/include/llvm/CodeGen/ScheduleDAGInstrs.h b/linux-x64/clang/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 3e3b604..50b186d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -34,6 +34,7 @@
namespace llvm {
+ class AAResults;
class LiveIntervals;
class MachineFrameInfo;
class MachineFunction;
@@ -57,7 +58,7 @@
: VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
unsigned getSparseSetIndex() const {
- return TargetRegisterInfo::virtReg2Index(VirtReg);
+ return Register::virtReg2Index(VirtReg);
}
};
@@ -173,7 +174,7 @@
/// Tracks the last instructions in this region using each virtual register.
VReg2SUnitOperIdxMultiMap CurrentVRegUses;
- AliasAnalysis *AAForDep = nullptr;
+ AAResults *AAForDep = nullptr;
/// Remember a generic side-effecting instruction as we proceed.
/// No other SU ever gets scheduled around it (except in the special
@@ -201,7 +202,7 @@
Value2SUsMap &loads, unsigned N);
/// Adds a chain edge between SUa and SUb, but only if both
- /// AliasAnalysis and Target fail to deny the dependency.
+ /// AAResults and Target fail to deny the dependency.
void addChainDependency(SUnit *SUa, SUnit *SUb,
unsigned Latency = 0);
@@ -267,6 +268,11 @@
return SU->SchedClass;
}
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(SUnit *SU, SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
/// Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator begin() const { return RegionBegin; }
@@ -306,7 +312,7 @@
/// If \p RPTracker is non-null, compute register pressure as a side effect.
/// The DAG builder is an efficient place to do it because it already visits
/// operands.
- void buildSchedGraph(AliasAnalysis *AA,
+ void buildSchedGraph(AAResults *AA,
RegPressureTracker *RPTracker = nullptr,
PressureDiffs *PDiffs = nullptr,
LiveIntervals *LIS = nullptr,
@@ -361,19 +367,12 @@
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
- /// Initializes register live-range state for updating kills.
- /// PostRA helper for rewriting kill flags.
- void startBlockForKills(MachineBasicBlock *BB);
-
- /// Toggles a register operand kill flag.
- ///
- /// Other adjustments may be made to the instruction if necessary. Return
- /// true if the operand has been deleted, false if not.
- void toggleKillFlag(MachineInstr &MI, MachineOperand &MO);
-
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
+
+ /// Returns true if the def register in \p MO has no uses.
+ bool deadDefHasNoUse(const MachineOperand &MO);
};
/// Creates a new SUnit and return a ptr to it.
@@ -389,10 +388,7 @@
/// Returns an existing SUnit for this MI, or nullptr.
inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
- DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
- if (I == MISUnitMap.end())
- return nullptr;
- return I->second;
+ return MISUnitMap.lookup(MI);
}
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/ScheduleDFS.h b/linux-x64/clang/include/llvm/CodeGen/ScheduleDFS.h
index d60deab..2e0a30c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ScheduleDFS.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ScheduleDFS.h
@@ -13,7 +13,6 @@
#ifndef LLVM_CODEGEN_SCHEDULEDFS_H
#define LLVM_CODEGEN_SCHEDULEDFS_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include <cassert>
@@ -22,6 +21,7 @@
namespace llvm {
+template <typename T> class ArrayRef;
class raw_ostream;
/// Represent the ILP of the subDAG rooted at a DAG node.
diff --git a/linux-x64/clang/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/linux-x64/clang/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 37590f4..9f1101b 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -57,7 +57,7 @@
/// other instruction is available, issue it first.
/// * NoopHazard: issuing this instruction would break the program. If
/// some other instruction can be issued, do so, otherwise issue a noop.
- virtual HazardType getHazardType(SUnit *m, int Stalls = 0) {
+ virtual HazardType getHazardType(SUnit *, int Stalls = 0) {
return NoHazard;
}
@@ -114,6 +114,14 @@
// Default implementation: count it as a cycle.
AdvanceCycle();
}
+
+ /// EmitNoops - This callback is invoked when noops were added to the
+ /// instruction stream.
+ virtual void EmitNoops(unsigned Quantity) {
+ // Default implementation: count it as a cycle.
+ for (unsigned i = 0; i < Quantity; ++i)
+ EmitNoop();
+ }
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/linux-x64/clang/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index ac67f30..cefafe8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -16,13 +16,13 @@
#define LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include <cassert>
#include <cstddef>
#include <cstring>
namespace llvm {
-class InstrItineraryData;
class ScheduleDAG;
class SUnit;
@@ -37,7 +37,7 @@
// bottom-up scheduler, then the scoreboard cycles are the inverse of the
// scheduler's cycles.
class Scoreboard {
- unsigned *Data = nullptr;
+ InstrStage::FuncUnits *Data = nullptr;
// The maximum number of cycles monitored by the Scoreboard. This
// value is determined based on the target itineraries to ensure
@@ -56,7 +56,7 @@
size_t getDepth() const { return Depth; }
- unsigned& operator[](size_t idx) const {
+ InstrStage::FuncUnits& operator[](size_t idx) const {
// Depth is expected to be a power-of-2.
assert(Depth && !(Depth & (Depth - 1)) &&
"Scoreboard was not initialized properly!");
@@ -67,7 +67,7 @@
void reset(size_t d = 1) {
if (!Data) {
Depth = d;
- Data = new unsigned[Depth];
+ Data = new InstrStage::FuncUnits[Depth];
}
memset(Data, 0, Depth * sizeof(Data[0]));
diff --git a/linux-x64/clang/include/llvm/CodeGen/SelectionDAG.h b/linux-x64/clang/include/llvm/CodeGen/SelectionDAG.h
index 12a9708..aeb488d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SelectionDAG.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SelectionDAG.h
@@ -26,10 +26,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -58,19 +55,24 @@
namespace llvm {
+class AAResults;
class BlockAddress;
+class BlockFrequencyInfo;
class Constant;
class ConstantFP;
class ConstantInt;
class DataLayout;
struct fltSemantics;
+class FunctionLoweringInfo;
class GlobalValue;
struct KnownBits;
+class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineConstantPoolValue;
class MCSymbol;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class SDDbgValue;
class SDDbgLabel;
class SelectionDAG;
@@ -235,6 +237,9 @@
/// whenever manipulating the DAG.
OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI = nullptr;
+ BlockFrequencyInfo *BFI = nullptr;
+
/// The starting token.
SDNode EntryNode;
@@ -269,7 +274,14 @@
using CallSiteInfo = MachineFunction::CallSiteInfo;
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
- DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
+
+ struct CallSiteDbgInfo {
+ CallSiteInfo CSInfo;
+ MDNode *HeapAllocSite = nullptr;
+ bool NoMerge = false;
+ };
+
+ DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
uint16_t NextPersistentId = 0;
@@ -319,6 +331,29 @@
virtual void anchor();
};
+ /// Help to insert SDNodeFlags automatically in transforming. Use
+ /// RAII to save and resume flags in current scope.
+ class FlagInserter {
+ SelectionDAG &DAG;
+ SDNodeFlags Flags;
+ FlagInserter *LastInserter;
+
+ public:
+ FlagInserter(SelectionDAG &SDAG, SDNodeFlags Flags)
+ : DAG(SDAG), Flags(Flags),
+ LastInserter(SDAG.getFlagInserter()) {
+ SDAG.setFlagInserter(this);
+ }
+ FlagInserter(SelectionDAG &SDAG, SDNode *N)
+ : FlagInserter(SDAG, N->getFlags()) {}
+
+ FlagInserter(const FlagInserter &) = delete;
+ FlagInserter &operator=(const FlagInserter &) = delete;
+ ~FlagInserter() { DAG.setFlagInserter(LastInserter); }
+
+ const SDNodeFlags getFlags() const { return Flags; }
+ };
+
/// When true, additional steps are taken to
/// ensure that getConstant() and similar functions return DAG nodes that
/// have legal types. This is important after type legalization since
@@ -382,7 +417,11 @@
Node->OperandList = nullptr;
}
void CreateTopologicalOrder(std::vector<SDNode*>& Order);
+
public:
+ // Maximum depth for recursive analysis such as computeKnownBits, etc.
+ static constexpr unsigned MaxRecursionDepth = 6;
+
explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
SelectionDAG(const SelectionDAG &) = delete;
SelectionDAG &operator=(const SelectionDAG &) = delete;
@@ -391,7 +430,8 @@
/// Prepare this SelectionDAG to process code in the given MachineFunction.
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence);
+ LegacyDivergenceAnalysis * Divergence,
+ ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin);
void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
FLI = FuncInfo;
@@ -411,8 +451,24 @@
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
- LLVMContext *getContext() const {return Context; }
+ LLVMContext *getContext() const { return Context; }
OptimizationRemarkEmitter &getORE() const { return *ORE; }
+ ProfileSummaryInfo *getPSI() const { return PSI; }
+ BlockFrequencyInfo *getBFI() const { return BFI; }
+
+ FlagInserter *getFlagInserter() { return Inserter; }
+ void setFlagInserter(FlagInserter *FI) { Inserter = FI; }
+
+ /// Just dump dot graph to a user-provided path and title.
+ /// This doesn't open the dot viewer program and
+ /// helps visualization when outside debugging session.
+ /// FileName expects absolute path. If provided
+ /// without any path separators then the file
+ /// will be created in the current directory.
+ /// Error will be emitted if the path is insane.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dumpDotGraph(const Twine &FileName, const Twine &Title);
+#endif
/// Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
void viewGraph(const std::string &Title);
@@ -489,7 +545,7 @@
/// certain types of nodes together, or eliminating superfluous nodes. The
/// Level argument controls whether Combine is allowed to produce nodes and
/// types that are illegal on the target.
- void Combine(CombineLevel Level, AliasAnalysis *AA,
+ void Combine(CombineLevel Level, AAResults *AA,
CodeGenOpt::Level OptLevel);
/// This transforms the SelectionDAG into a SelectionDAG that
@@ -582,6 +638,8 @@
bool isTarget = false);
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL,
bool LegalTypes = true);
+ SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget = false);
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isOpaque = false) {
@@ -628,10 +686,9 @@
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
int64_t offset = 0, bool isTargetGA = false,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
- int64_t offset = 0,
- unsigned char TargetFlags = 0) {
+ int64_t offset = 0, unsigned TargetFlags = 0) {
return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags);
}
SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false);
@@ -639,36 +696,34 @@
return getFrameIndex(FI, VT, true);
}
SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false,
- unsigned char TargetFlags = 0);
- SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0);
+ SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) {
return getJumpTable(JTI, VT, true, TargetFlags);
}
- SDValue getConstantPool(const Constant *C, EVT VT,
- unsigned Align = 0, int Offs = 0, bool isT=false,
- unsigned char TargetFlags = 0);
+ SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align = None,
+ int Offs = 0, bool isT = false,
+ unsigned TargetFlags = 0);
SDValue getTargetConstantPool(const Constant *C, EVT VT,
- unsigned Align = 0, int Offset = 0,
- unsigned char TargetFlags = 0) {
+ MaybeAlign Align = None, int Offset = 0,
+ unsigned TargetFlags = 0) {
return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
}
SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT,
- unsigned Align = 0, int Offs = 0, bool isT=false,
- unsigned char TargetFlags = 0);
- SDValue getTargetConstantPool(MachineConstantPoolValue *C,
- EVT VT, unsigned Align = 0,
- int Offset = 0, unsigned char TargetFlags=0) {
+ MaybeAlign Align = None, int Offs = 0,
+ bool isT = false, unsigned TargetFlags = 0);
+ SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT,
+ MaybeAlign Align = None, int Offset = 0,
+ unsigned TargetFlags = 0) {
return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
}
SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
// When generating a branch to a BB, we don't in general know enough
// to provide debug info for the BB at that time, so keep this one around.
SDValue getBasicBlock(MachineBasicBlock *MBB);
- SDValue getBasicBlock(MachineBasicBlock *MBB, SDLoc dl);
SDValue getExternalSymbol(const char *Sym, EVT VT);
- SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT);
SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
SDValue getValueType(EVT);
@@ -677,12 +732,10 @@
SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root,
MCSymbol *Label);
- SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset = 0, bool isTarget = false,
- unsigned char TargetFlags = 0);
+ SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0,
+ bool isTarget = false, unsigned TargetFlags = 0);
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset = 0,
- unsigned char TargetFlags = 0) {
+ int64_t Offset = 0, unsigned TargetFlags = 0) {
return getBlockAddress(BA, VT, Offset, true, TargetFlags);
}
@@ -773,6 +826,20 @@
return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
+ // Return a splat ISD::SPLAT_VECTOR node, consisting of Op splatted to all
+ // elements.
+ SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op) {
+ if (Op.getOpcode() == ISD::UNDEF) {
+ assert((VT.getVectorElementType() == Op.getValueType() ||
+ (VT.isInteger() &&
+ VT.getVectorElementType().bitsLE(Op.getValueType()))) &&
+ "A splatted value must have a width equal or (for integers) "
+ "greater than the vector element type!");
+ return getNode(ISD::UNDEF, SDLoc(), VT);
+ }
+ return getNode(ISD::SPLAT_VECTOR, DL, VT, Op);
+ }
+
/// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
/// the shuffle node in input but with swapped operands.
///
@@ -783,6 +850,11 @@
/// float type VT, by either extending or rounding (by truncation).
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Convert Op, which must be a STRICT operation of float type, to the
+ /// float type VT, by either extending or rounding (by truncation).
+ std::pair<SDValue, SDValue>
+ getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
+
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
@@ -820,22 +892,28 @@
/// Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
+ /// Returns sum of the base pointer and offset.
+ /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
+ SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
+ const SDNodeFlags Flags = SDNodeFlags());
+ SDValue getMemBasePlusOffset(SDValue Base, SDValue Offset, const SDLoc &DL,
+ const SDNodeFlags Flags = SDNodeFlags());
+
/// Create an add instruction with appropriate flags when used for
/// addressing some offset of an object. i.e. if a load is split into multiple
/// components, create an add nuw from the base pointer to the offset.
- SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) {
- EVT VT = Op.getValueType();
- return getObjectPtrOffset(SL, Op, getConstant(Offset, SL, VT));
+ SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) {
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
}
- SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, SDValue Offset) {
- EVT VT = Op.getValueType();
-
+ SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, SDValue Offset) {
// The object itself can't wrap around the address space, so it shouldn't be
// possible for the adds of the offsets to the split parts to overflow.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- return getNode(ISD::ADD, SL, VT, Op, Offset, Flags);
+ return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
}
/// Return a new CALLSEQ_START node, that starts new call frame, in which
@@ -873,6 +951,14 @@
return getNode(ISD::UNDEF, SDLoc(), VT);
}
+ /// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
+ SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
+ assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
+ "Immediate does not fit VT");
+ return getNode(ISD::VSCALE, DL, VT,
+ getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
+ }
+
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
@@ -883,21 +969,31 @@
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDUse> Ops);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags());
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops);
SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
+
+ // Use flags from current flag inserter.
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3);
// Specialize based on number of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
- const SDNodeFlags Flags = SDNodeFlags());
+ const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
+ SDValue N2, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDValue N3,
- const SDNodeFlags Flags = SDNodeFlags());
+ SDValue N2, SDValue N3, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
@@ -921,18 +1017,50 @@
/// stack arguments from being clobbered.
SDValue getStackArgumentTokenFactor(SDValue Chain);
+ LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
+ bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo),
+ "Use the version that takes Align instead") {
+ return getMemcpy(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
+ AlwaysInline, isTailCall, DstPtrInfo, SrcPtrInfo);
+ }
+
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
- bool isTailCall, MachinePointerInfo DstPtrInfo,
+ SDValue Size, Align Alignment, bool isVol,
+ bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo);
+ LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo),
+ "Use the version that takes Align instead") {
+ return getMemmove(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
+ isTailCall, DstPtrInfo, SrcPtrInfo);
+ }
SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVol, bool isTailCall,
+ SDValue Size, Align Alignment, bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo);
+ LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo),
+ "Use the version that takes Align instead") {
+ return getMemset(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
+ isTailCall, DstPtrInfo);
+ }
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVol, bool isTailCall,
+ SDValue Size, Align Alignment, bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo);
SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
@@ -955,13 +1083,17 @@
/// Helper function to make it easier to build SetCC's if you just have an
/// ISD::CondCode instead of an SDValue.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
- ISD::CondCode Cond) {
+ ISD::CondCode Cond, SDValue Chain = SDValue(),
+ bool IsSignaling = false) {
assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
"Cannot compare scalars to vectors");
assert(LHS.getValueType().isVector() == VT.isVector() &&
"Cannot compare scalars to vectors");
assert(Cond != ISD::SETCC_INVALID &&
"Cannot create a setCC of an invalid node.");
+ if (Chain)
+ return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL,
+ {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)});
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
}
@@ -993,7 +1125,8 @@
/// Try to simplify a floating-point binary operation into 1 of its operands
/// or a constant.
- SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y);
+ SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
+ SDNodeFlags Flags);
/// VAArg produces a result and token chain, and takes a pointer
/// and a source value as input.
@@ -1029,14 +1162,36 @@
/// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
/// less than FIRST_TARGET_MEMORY_OPCODE.
SDValue getMemIntrinsicNode(
- unsigned Opcode, const SDLoc &dl, SDVTList VTList,
- ArrayRef<SDValue> Ops, EVT MemVT,
- MachinePointerInfo PtrInfo,
- unsigned Align = 0,
- MachineMemOperand::Flags Flags
- = MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
- unsigned Size = 0,
- const AAMDNodes &AAInfo = AAMDNodes());
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore,
+ uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes());
+
+ inline SDValue getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, MaybeAlign Alignment = None,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore,
+ uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) {
+ // Ensure that codegen never sees alignment 0
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo,
+ Alignment.getValueOr(getEVTAlign(MemVT)), Flags,
+ Size, AAInfo);
+ }
+
+ LLVM_ATTRIBUTE_DEPRECATED(
+ inline SDValue getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore,
+ uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()),
+ "") {
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo,
+ MaybeAlign(Alignment), Flags, Size, AAInfo);
+ }
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
ArrayRef<SDValue> Ops, EVT MemVT,
@@ -1048,6 +1203,12 @@
SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
int FrameIndex, int64_t Size, int64_t Offset = -1);
+ /// Creates a PseudoProbeSDNode with function GUID `Guid` and
+ /// the index of the block `Index` it is probing, as well as the attributes
+ /// `attr` of the probe.
+ SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid,
+ uint64_t Index, uint32_t Attr);
+
/// Create a MERGE_VALUES node from the given operands.
SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
@@ -1057,18 +1218,39 @@
/// This function will set the MOLoad flag on MMOFlags, but you can set it if
/// you want. The MOStore flag must not be set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
- MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr);
+ /// FIXME: Remove once transition to Align is over.
+ inline SDValue
+ getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ const MDNode *Ranges = nullptr) {
+ return getLoad(VT, dl, Chain, Ptr, PtrInfo, MaybeAlign(Alignment), MMOFlags,
+ AAInfo, Ranges);
+ }
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
MachineMemOperand *MMO);
SDValue
getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment = 0,
+ MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes());
+ /// FIXME: Remove once transition to Align is over.
+ inline SDValue
+ getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
+ SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes()) {
+ return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT,
+ MaybeAlign(Alignment), MMOFlags, AAInfo);
+ }
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
SDValue Chain, SDValue Ptr, EVT MemVT,
MachineMemOperand *MMO);
@@ -1076,10 +1258,32 @@
SDValue Offset, ISD::MemIndexedMode AM);
SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
- MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr);
+ inline SDValue getLoad(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo,
+ EVT MemVT, MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) {
+ // Ensures that codegen never sees a None Alignment.
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
+ Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo,
+ Ranges);
+ }
+ /// FIXME: Remove once transition to Align is over.
+ inline SDValue
+ getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ const MDNode *Ranges = nullptr) {
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
+ MaybeAlign(Alignment), MMOFlags, AAInfo, Ranges);
+ }
SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
EVT MemVT, MachineMemOperand *MMO);
@@ -1088,45 +1292,81 @@
///
/// This function will set the MOStore flag on MMOFlags, but you can set it if
/// you want. The MOLoad and MOInvariant flags must not be set.
+
SDValue
getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, Align Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes());
+ inline SDValue
+ getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ MachinePointerInfo PtrInfo, MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes()) {
+ return getStore(Chain, dl, Val, Ptr, PtrInfo,
+ Alignment.getValueOr(getEVTAlign(Val.getValueType())),
+ MMOFlags, AAInfo);
+ }
+ /// FIXME: Remove once transition to Align is over.
+ inline SDValue
+ getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ MachinePointerInfo PtrInfo, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes()) {
+ return getStore(Chain, dl, Val, Ptr, PtrInfo, MaybeAlign(Alignment),
+ MMOFlags, AAInfo);
+ }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
MachineMemOperand *MMO);
SDValue
getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, EVT SVT, Align Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes());
+ inline SDValue
+ getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT SVT,
+ MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes()) {
+ return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
+ Alignment.getValueOr(getEVTAlign(SVT)), MMOFlags,
+ AAInfo);
+ }
+ /// FIXME: Remove once transition to Align is over.
+ inline SDValue
+ getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes()) {
+ return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
+ MaybeAlign(Alignment), MMOFlags, AAInfo);
+ }
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, EVT SVT, MachineMemOperand *MMO);
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
- /// Returns sum of the base pointer and offset.
- SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL);
-
- SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
- SDValue Mask, SDValue Src0, EVT MemVT,
- MachineMemOperand *MMO, ISD::LoadExtType,
- bool IsExpanding = false);
+ SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
+ SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexedMode AM,
+ ISD::LoadExtType, bool IsExpanding = false);
+ SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
- SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, bool IsTruncating = false,
- bool IsCompressing = false);
+ SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexedMode AM,
+ bool IsTruncating = false, bool IsCompressing = false);
+ SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
- ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
- ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
-
- /// Return (create a new or find existing) a target-specific node.
- /// TargetMemSDNode should be derived class from MemSDNode.
- template <class TargetMemSDNode>
- SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops,
- const SDLoc &dl, EVT MemVT,
- MachineMemOperand *MMO);
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType,
+ bool IsTruncating = false);
/// Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);
@@ -1142,6 +1382,12 @@
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS,
unsigned DestAS);
+ /// Return a freeze using the SDLoc of the value operand.
+ SDValue getFreeze(SDValue V);
+
+ /// Return an AssertAlignSDNode.
+ SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
+
/// Return the specified value casted to
/// the target's desired shift amount type.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
@@ -1185,6 +1431,9 @@
void setNodeMemRefs(MachineSDNode *N,
ArrayRef<MachineMemOperand *> NewMemRefs);
+ // Calculate divergence of node \p N based on its operands.
+ bool calculateDivergence(SDNode *N);
+
// Propagates the change in divergence to users
void updateDivergence(SDNode * N);
@@ -1205,8 +1454,6 @@
EVT VT2, ArrayRef<SDValue> Ops);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
- SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
- EVT VT2, SDValue Op1);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
EVT VT2, SDValue Op1, SDValue Op2);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,
@@ -1264,8 +1511,13 @@
SDValue Operand, SDValue Subreg);
/// Get the specified node if it's already available, or else return NULL.
- SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags = SDNodeFlags());
+ SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
+ SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops);
+
+ /// Check if a node exists without modifying its flags.
+ bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops);
/// Creates a SDDbgValue node.
SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N,
@@ -1339,7 +1591,14 @@
/// chain to the token factor. This ensures that the new memory node will have
/// the same relative memory dependency position as the old load. Returns the
/// new merged load chain.
- SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New);
+ SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain);
+
+ /// If an existing load has uses of its chain, create a token factor node with
+ /// that chain and the new memory node's chain and update users of the old
+ /// chain to the token factor. This ensures that the new memory node will have
+ /// the same relative memory dependency position as the old load. Returns the
+ /// new merged load chain.
+ SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
/// Topological-sort the AllNodes list and a
/// assign a unique node id for each node in the DAG based on their
@@ -1359,6 +1618,7 @@
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP format");
case MVT::f16: return APFloat::IEEEhalf();
+ case MVT::bf16: return APFloat::BFloat();
case MVT::f32: return APFloat::IEEEsingle();
case MVT::f64: return APFloat::IEEEdouble();
case MVT::f80: return APFloat::x87DoubleExtended();
@@ -1407,6 +1667,15 @@
void dump() const;
+ /// In most cases this function returns the ABI alignment for a given type,
+ /// except for illegal vector types where the alignment exceeds that of the
+ /// stack. In such cases we attempt to break the vector down to a legal type
+ /// and return the ABI alignment for that instead.
+ Align getReducedAlign(EVT VT, bool UseABI);
+
+ /// Create a stack temporary based on the size in bytes and the alignment
+ SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment);
+
/// Create a stack temporary, suitable for holding the specified value type.
/// If minAlign is specified, the slot size will have at least that alignment.
SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
@@ -1420,11 +1689,7 @@
const SDNode *N2);
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDNode *N1, SDNode *N2);
-
- SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- const ConstantSDNode *C1,
- const ConstantSDNode *C2);
+ ArrayRef<SDValue> Ops);
SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
@@ -1571,7 +1836,8 @@
/// for \p DemandedElts.
///
/// NOTE: The function will return true for a demanded splat of UNDEF values.
- bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts);
+ bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts,
+ unsigned Depth = 0);
/// Test whether \p V has a splatted value.
bool isSplatValue(SDValue V, bool AllowUndefs = false);
@@ -1583,14 +1849,34 @@
/// that element from the source vector.
SDValue getSplatValue(SDValue V);
+ /// If a SHL/SRA/SRL node \p V has a constant or splat constant shift amount
+ /// that is less than the element bit-width of the shift node, return it.
+ const APInt *getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const;
+
+ /// If a SHL/SRA/SRL node \p V has constant shift amounts that are all less
+ /// than the element bit-width of the shift node, return the minimum value.
+ const APInt *
+ getValidMinimumShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const;
+
+ /// If a SHL/SRA/SRL node \p V has constant shift amounts that are all less
+ /// than the element bit-width of the shift node, return the maximum value.
+ const APInt *
+ getValidMaximumShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const;
+
/// Match a binop + shuffle pyramid that represents a horizontal reduction
/// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
/// Extract. The reduction must use one of the opcodes listed in /p
/// CandidateBinOps and on success /p BinOp will contain the matching opcode.
/// Returns the vector that is being reduced on, or SDValue() if a reduction
- /// was not matched.
+ /// was not matched. If \p AllowPartials is set then in the case of a
+ /// reduction pattern that only matches the first few stages, the extracted
+ /// subvector of the start of the reduction is returned.
SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps);
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials = false);
/// Utility function used by legalize and lowering to
/// "unroll" a vector operation by splitting out the scalars and operating
@@ -1612,14 +1898,28 @@
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
unsigned Bytes, int Dist) const;
- /// Infer alignment of a load / store address. Return 0 if
- /// it cannot be inferred.
- unsigned InferPtrAlignment(SDValue Ptr) const;
+ /// Infer alignment of a load / store address. Return None if it cannot be
+ /// inferred.
+ MaybeAlign InferPtrAlign(SDValue Ptr) const;
+
+ LLVM_ATTRIBUTE_DEPRECATED(inline unsigned InferPtrAlignment(SDValue Ptr)
+ const,
+ "Use InferPtrAlign instead") {
+ if (auto A = InferPtrAlign(Ptr))
+ return A->value();
+ return 0;
+ }
/// Compute the VTs needed for the low/hi parts of a type
/// which is split (or expanded) into two not necessarily identical pieces.
std::pair<EVT, EVT> GetSplitDestVTs(const EVT &VT) const;
+ /// Compute the VTs needed for the low/hi parts of a type, dependent on an
+ /// enveloping VT that has been split into two identical pieces. Sets the
+ /// HisIsEmpty flag when hi type has zero storage size.
+ std::pair<EVT, EVT> GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
+ bool *HiIsEmpty) const;
+
/// Split the vector with EXTRACT_SUBVECTOR using the provides
/// VTs and return the low/high part.
std::pair<SDValue, SDValue> SplitVector(const SDValue &N, const SDLoc &DL,
@@ -1642,38 +1942,82 @@
/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
SDValue WidenVector(const SDValue &N, const SDLoc &DL);
- /// Append the extracted elements from Start to Count out of the vector Op
- /// in Args. If Count is 0, all of the elements will be extracted.
+ /// Append the extracted elements from Start to Count out of the vector Op in
+ /// Args. If Count is 0, all of the elements will be extracted. The extracted
+ /// elements will have type EVT if it is provided, and otherwise their type
+ /// will be Op's element type.
void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
- unsigned Start = 0, unsigned Count = 0);
+ unsigned Start = 0, unsigned Count = 0,
+ EVT EltVT = EVT());
/// Compute the default alignment value for the given type.
- unsigned getEVTAlignment(EVT MemoryVT) const;
+ Align getEVTAlign(EVT MemoryVT) const;
+ /// Compute the default alignment value for the given type.
+ /// FIXME: Remove once transition to Align is over.
+ inline unsigned getEVTAlignment(EVT MemoryVT) const {
+ return getEVTAlign(MemoryVT).value();
+ }
/// Test whether the given value is a constant int or similar node.
- SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N);
+ SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const;
/// Test whether the given value is a constant FP or similar node.
- SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N);
+ SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) const ;
/// \returns true if \p N is any kind of constant or build_vector of
/// constants, int or float. If a vector, it may not necessarily be a splat.
- inline bool isConstantValueOfAnyType(SDValue N) {
+ inline bool isConstantValueOfAnyType(SDValue N) const {
return isConstantIntBuildVectorOrConstantInt(N) ||
isConstantFPBuildVectorOrConstantFP(N);
}
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
- SDCallSiteInfo[CallNode] = std::move(CallInfo);
+ SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
}
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
- auto I = SDCallSiteInfo.find(CallNode);
- if (I != SDCallSiteInfo.end())
- return std::move(I->second);
+ auto I = SDCallSiteDbgInfo.find(CallNode);
+ if (I != SDCallSiteDbgInfo.end())
+ return std::move(I->second).CSInfo;
return CallSiteInfo();
}
+ void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
+ SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
+ }
+
+ /// Return the HeapAllocSite type associated with the SDNode, if it exists.
+ MDNode *getHeapAllocSite(const SDNode *Node) {
+ auto It = SDCallSiteDbgInfo.find(Node);
+ if (It == SDCallSiteDbgInfo.end())
+ return nullptr;
+ return It->second.HeapAllocSite;
+ }
+
+ void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) {
+ if (NoMerge)
+ SDCallSiteDbgInfo[Node].NoMerge = NoMerge;
+ }
+
+ bool getNoMergeSiteInfo(const SDNode *Node) {
+ auto I = SDCallSiteDbgInfo.find(Node);
+ if (I == SDCallSiteDbgInfo.end())
+ return false;
+ return I->second.NoMerge;
+ }
+
+ /// Return the current function's default denormal handling kind for the given
+ /// floating point type.
+ DenormalMode getDenormalMode(EVT VT) const {
+ return MF->getDenormalMode(EVTToAPFloatSemantics(VT));
+ }
+
+ bool shouldOptForSize() const;
+
+ /// Get the (commutative) neutral element for the given opcode, if it exists.
+ SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDNodeFlags Flags);
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
@@ -1712,8 +2056,10 @@
std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
StringMap<SDNode*> ExternalSymbols;
- std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
+ std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols;
DenseMap<MCSymbol *, SDNode *> MCSymbols;
+
+ FlagInserter *Inserter = nullptr;
};
template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
@@ -1728,41 +2074,6 @@
}
};
-template <class TargetMemSDNode>
-SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
- ArrayRef<SDValue> Ops,
- const SDLoc &dl, EVT MemVT,
- MachineMemOperand *MMO) {
- /// Compose node ID and try to find an existing node.
- FoldingSetNodeID ID;
- unsigned Opcode =
- TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode();
- ID.AddInteger(Opcode);
- ID.AddPointer(VTs.VTs);
- for (auto& Op : Ops) {
- ID.AddPointer(Op.getNode());
- ID.AddInteger(Op.getResNo());
- }
- ID.AddInteger(MemVT.getRawBits());
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- ID.AddInteger(getSyntheticNodeSubclassData<TargetMemSDNode>(
- dl.getIROrder(), VTs, MemVT, MMO));
-
- void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
- cast<TargetMemSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
-
- /// Existing node was not found. Create a new one.
- auto *N = newSDNode<TargetMemSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- MemVT, MMO);
- createOperands(N, Ops);
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- return SDValue(N, 0);
-}
-
} // end namespace llvm
#endif // LLVM_CODEGEN_SELECTIONDAG_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGISel.h b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGISel.h
index 147c325..84bb11e 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGISel.h
@@ -18,26 +18,21 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/Pass.h"
#include <memory>
namespace llvm {
- class FastISel;
- class SelectionDAGBuilder;
- class SDValue;
- class MachineRegisterInfo;
- class MachineBasicBlock;
- class MachineFunction;
- class MachineInstr;
- class OptimizationRemarkEmitter;
- class TargetLowering;
- class TargetLibraryInfo;
- class FunctionLoweringInfo;
- class ScheduleHazardRecognizer;
- class SwiftErrorValueTracking;
- class GCFunctionInfo;
- class ScheduleDAGSDNodes;
- class LoadInst;
+class AAResults;
+class SelectionDAGBuilder;
+class SDValue;
+class MachineRegisterInfo;
+class MachineFunction;
+class OptimizationRemarkEmitter;
+class TargetLowering;
+class TargetLibraryInfo;
+class FunctionLoweringInfo;
+class SwiftErrorValueTracking;
+class GCFunctionInfo;
+class ScheduleDAGSDNodes;
/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
/// pattern-matching instruction selectors.
@@ -45,13 +40,13 @@
public:
TargetMachine &TM;
const TargetLibraryInfo *LibInfo;
- FunctionLoweringInfo *FuncInfo;
+ std::unique_ptr<FunctionLoweringInfo> FuncInfo;
SwiftErrorValueTracking *SwiftError;
MachineFunction *MF;
MachineRegisterInfo *RegInfo;
SelectionDAG *CurDAG;
- SelectionDAGBuilder *SDB;
- AliasAnalysis *AA;
+ std::unique_ptr<SelectionDAGBuilder> SDB;
+ AAResults *AA;
GCFunctionInfo *GFI;
CodeGenOpt::Level OptLevel;
const TargetInstrInfo *TII;
@@ -75,7 +70,7 @@
bool runOnMachineFunction(MachineFunction &MF) override;
- virtual void EmitFunctionEntryCode() {}
+ virtual void emitFunctionEntryCode() {}
/// PreprocessISelDAG - This hook allows targets to hack on the graph before
/// instruction selection starts.
@@ -162,6 +157,7 @@
OPC_EmitMergeInputChains1_1,
OPC_EmitMergeInputChains1_2,
OPC_EmitCopyToReg,
+ OPC_EmitCopyToReg2,
OPC_EmitNodeXForm,
OPC_EmitNode,
// Space-optimized forms that implicitly encode number of result VTs.
@@ -247,6 +243,11 @@
virtual StringRef getIncludePathForIndex(unsigned index) {
llvm_unreachable("Tblgen should generate the implementation of this!");
}
+
+ bool shouldOptForSize(const MachineFunction *MF) const {
+ return CurDAG->shouldOptForSize();
+ }
+
public:
// Calls to these predicates are generated by tblgen.
bool CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
@@ -301,24 +302,27 @@
return false;
}
+ /// Return whether the node may raise an FP exception.
+ bool mayRaiseFPException(SDNode *Node) const;
+
bool isOrEquivalentToAdd(const SDNode *N) const;
private:
// Calls to these functions are generated by tblgen.
- void Select_INLINEASM(SDNode *N, bool Branch);
+ void Select_INLINEASM(SDNode *N);
void Select_READ_REGISTER(SDNode *Op);
void Select_WRITE_REGISTER(SDNode *Op);
void Select_UNDEF(SDNode *N);
void CannotYetSelect(SDNode *N);
+ void Select_FREEZE(SDNode *N);
+
private:
void DoInstructionSelection();
SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
ArrayRef<SDValue> Ops, unsigned EmitNodeInfo);
- SDNode *MutateStrictFPToFP(SDNode *Node, unsigned NewOpc);
-
/// Prepares the landing pad to take incoming values or do other EH
/// personality specific tasks. Returns true if the block should be
/// instruction selected, false if no code should be emitted for it.
diff --git a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGNodes.h b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGNodes.h
index 5aab964..3d12240 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -30,6 +30,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
@@ -42,6 +43,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -83,29 +85,42 @@
/// Node predicates
- /// If N is a BUILD_VECTOR node whose elements are all the same constant or
- /// undefined, return true and return the constant value in \p SplatValue.
- bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
+/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
+/// same constant or undefined, return true and return the constant value in
+/// \p SplatValue.
+bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
- /// Return true if the specified node is a BUILD_VECTOR where all of the
- /// elements are ~0 or undef.
- bool isBuildVectorAllOnes(const SDNode *N);
+/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
+/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
+/// true, it only checks BUILD_VECTOR.
+bool isConstantSplatVectorAllOnes(const SDNode *N,
+ bool BuildVectorOnly = false);
- /// Return true if the specified node is a BUILD_VECTOR where all of the
- /// elements are 0 or undef.
- bool isBuildVectorAllZeros(const SDNode *N);
+/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
+/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
+/// only checks BUILD_VECTOR.
+bool isConstantSplatVectorAllZeros(const SDNode *N,
+ bool BuildVectorOnly = false);
- /// Return true if the specified node is a BUILD_VECTOR node of all
- /// ConstantSDNode or undef.
- bool isBuildVectorOfConstantSDNodes(const SDNode *N);
+/// Return true if the specified node is a BUILD_VECTOR where all of the
+/// elements are ~0 or undef.
+bool isBuildVectorAllOnes(const SDNode *N);
- /// Return true if the specified node is a BUILD_VECTOR node of all
- /// ConstantFPSDNode or undef.
- bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
+/// Return true if the specified node is a BUILD_VECTOR where all of the
+/// elements are 0 or undef.
+bool isBuildVectorAllZeros(const SDNode *N);
- /// Return true if the node has at least one operand and all operands of the
- /// specified node are ISD::UNDEF.
- bool allOperandsUndef(const SDNode *N);
+/// Return true if the specified node is a BUILD_VECTOR node of all
+/// ConstantSDNode or undef.
+bool isBuildVectorOfConstantSDNodes(const SDNode *N);
+
+/// Return true if the specified node is a BUILD_VECTOR node of all
+/// ConstantFPSDNode or undef.
+bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
+
+/// Return true if the node has at least one operand and all operands of the
+/// specified node are ISD::UNDEF.
+bool allOperandsUndef(const SDNode *N);
} // end namespace ISD
@@ -170,12 +185,16 @@
}
/// Returns the size of the value in bits.
- unsigned getValueSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getValueSizeInBits() const {
return getValueType().getSizeInBits();
}
- unsigned getScalarValueSizeInBits() const {
- return getValueType().getScalarType().getSizeInBits();
+ uint64_t getScalarValueSizeInBits() const {
+ return getValueType().getScalarType().getFixedSizeInBits();
}
// Forwarding methods - These forward to the corresponding methods in SDNode.
@@ -351,11 +370,6 @@
/// the backend.
struct SDNodeFlags {
private:
- // This bit is used to determine if the flags are in a defined state.
- // Flag bits can only be masked out during intersection if the masking flags
- // are defined.
- bool AnyDefined : 1;
-
bool NoUnsignedWrap : 1;
bool NoSignedWrap : 1;
bool Exact : 1;
@@ -363,7 +377,6 @@
bool NoInfs : 1;
bool NoSignedZeros : 1;
bool AllowReciprocal : 1;
- bool VectorReduction : 1;
bool AllowContract : 1;
bool ApproximateFuncs : 1;
bool AllowReassociation : 1;
@@ -378,11 +391,10 @@
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
- : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
- Exact(false), NoNaNs(false), NoInfs(false),
- NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
+ : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
+ NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false), NoFPExcept(true) {}
+ AllowReassociation(false), NoFPExcept(false) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
@@ -395,60 +407,18 @@
setAllowReassociation(FPMO.hasAllowReassoc());
}
- /// Sets the state of the flags to the defined state.
- void setDefined() { AnyDefined = true; }
- /// Returns true if the flags are in a defined state.
- bool isDefined() const { return AnyDefined; }
-
// These are mutators for each flag.
- void setNoUnsignedWrap(bool b) {
- setDefined();
- NoUnsignedWrap = b;
- }
- void setNoSignedWrap(bool b) {
- setDefined();
- NoSignedWrap = b;
- }
- void setExact(bool b) {
- setDefined();
- Exact = b;
- }
- void setNoNaNs(bool b) {
- setDefined();
- NoNaNs = b;
- }
- void setNoInfs(bool b) {
- setDefined();
- NoInfs = b;
- }
- void setNoSignedZeros(bool b) {
- setDefined();
- NoSignedZeros = b;
- }
- void setAllowReciprocal(bool b) {
- setDefined();
- AllowReciprocal = b;
- }
- void setVectorReduction(bool b) {
- setDefined();
- VectorReduction = b;
- }
- void setAllowContract(bool b) {
- setDefined();
- AllowContract = b;
- }
- void setApproximateFuncs(bool b) {
- setDefined();
- ApproximateFuncs = b;
- }
- void setAllowReassociation(bool b) {
- setDefined();
- AllowReassociation = b;
- }
- void setFPExcept(bool b) {
- setDefined();
- NoFPExcept = !b;
- }
+ void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
+ void setNoSignedWrap(bool b) { NoSignedWrap = b; }
+ void setExact(bool b) { Exact = b; }
+ void setNoNaNs(bool b) { NoNaNs = b; }
+ void setNoInfs(bool b) { NoInfs = b; }
+ void setNoSignedZeros(bool b) { NoSignedZeros = b; }
+ void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+ void setAllowContract(bool b) { AllowContract = b; }
+ void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
+ void setAllowReassociation(bool b) { AllowReassociation = b; }
+ void setNoFPExcept(bool b) { NoFPExcept = b; }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -458,22 +428,14 @@
bool hasNoInfs() const { return NoInfs; }
bool hasNoSignedZeros() const { return NoSignedZeros; }
bool hasAllowReciprocal() const { return AllowReciprocal; }
- bool hasVectorReduction() const { return VectorReduction; }
bool hasAllowContract() const { return AllowContract; }
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
- bool hasFPExcept() const { return !NoFPExcept; }
+ bool hasNoFPExcept() const { return NoFPExcept; }
- bool isFast() const {
- return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
- AllowContract && ApproximateFuncs && AllowReassociation;
- }
-
- /// Clear any flags in this flag set that aren't also set in Flags.
- /// If the given Flags are undefined then don't do anything.
+ /// Clear any flags in this flag set that aren't also set in Flags. All
+ /// flags will be cleared if Flags are undefined.
void intersectWith(const SDNodeFlags Flags) {
- if (!Flags.isDefined())
- return;
NoUnsignedWrap &= Flags.NoUnsignedWrap;
NoSignedWrap &= Flags.NoSignedWrap;
Exact &= Flags.Exact;
@@ -481,7 +443,6 @@
NoInfs &= Flags.NoInfs;
NoSignedZeros &= Flags.NoSignedZeros;
AllowReciprocal &= Flags.AllowReciprocal;
- VectorReduction &= Flags.VectorReduction;
AllowContract &= Flags.AllowContract;
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
@@ -548,16 +509,24 @@
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
+ friend class MaskedLoadStoreSDNode;
+ friend class MaskedGatherScatterSDNode;
uint16_t : NumMemSDNodeBits;
- uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode
+ // This storage is shared between disparate class hierarchies to hold an
+ // enumeration specific to the class hierarchy in use.
+ // LSBaseSDNode => enum ISD::MemIndexedMode
+ // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
+ // MaskedGatherScatterSDNode => enum ISD::MemIndexType
+ uint16_t AddressingMode : 3;
};
enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
class LoadSDNodeBitfields {
friend class LoadSDNode;
friend class MaskedLoadSDNode;
+ friend class MaskedGatherSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -568,6 +537,7 @@
class StoreSDNodeBitfields {
friend class StoreSDNode;
friend class MaskedStoreSDNode;
+ friend class MaskedScatterSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -654,6 +624,15 @@
/// \<target\>ISD namespace).
bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
+ /// Test if this node has a target-specific opcode that may raise
+ /// FP exceptions (in the \<target\>ISD namespace and greater than
+ /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
+ /// opcode are currently automatically considered to possibly raise
+ /// FP exceptions as well.
+ bool isTargetStrictFPOpcode() const {
+ return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
+ }
+
/// Test if this node has a target-specific
/// memory-referencing opcode (in the \<target\>ISD namespace and
/// greater than FIRST_TARGET_MEMORY_OPCODE).
@@ -680,32 +659,11 @@
switch (NodeType) {
default:
return false;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP16_TO_FP:
+ case ISD::STRICT_FP_TO_FP16:
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
return true;
}
}
@@ -990,7 +948,6 @@
const SDNodeFlags getFlags() const { return Flags; }
void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
- bool isFast() { return Flags.isFast(); }
/// Clear any flags in this node that aren't also set in Flags.
/// If Flags is not in a defined state then this has no effect.
@@ -1011,7 +968,11 @@
}
/// Returns MVT::getSizeInBits(getValueType(ResNo)).
- unsigned getValueSizeInBits(unsigned ResNo) const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getValueSizeInBits(unsigned ResNo) const {
return getValueType(ResNo).getSizeInBits();
}
@@ -1019,6 +980,9 @@
value_iterator value_begin() const { return ValueList; }
value_iterator value_end() const { return ValueList+NumValues; }
+ iterator_range<value_iterator> values() const {
+ return llvm::make_range(value_begin(), value_end());
+ }
/// Return the opcode of this operation for printing.
std::string getOperationName(const SelectionDAG *G = nullptr) const;
@@ -1298,12 +1262,14 @@
bool writeMem() const { return MMO->isStore(); }
/// Returns alignment and volatility of the memory access
- unsigned getOriginalAlignment() const {
- return MMO->getBaseAlignment();
+ Align getOriginalAlign() const { return MMO->getBaseAlign(); }
+ Align getAlign() const { return MMO->getAlign(); }
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned getOriginalAlignment() const,
+ "Use getOriginalAlign() instead") {
+ return MMO->getBaseAlign().value();
}
- unsigned getAlignment() const {
- return MMO->getAlignment();
- }
+ // FIXME: Remove once transition to getAlign is over.
+ unsigned getAlignment() const { return MMO->getAlign().value(); }
/// Return the SubclassData value, without HasDebugValue. This contains an
/// encoding of the volatile flag, as well as bits used by subclasses. This
@@ -1346,6 +1312,17 @@
/// store occurs.
AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
+ /// Return true if the memory operation ordering is Unordered or higher.
+ bool isAtomic() const { return MMO->isAtomic(); }
+
+ /// Returns true if the memory operation doesn't imply any ordering
+ /// constraints on surrounding memory operations beyond the normal memory
+ /// aliasing rules.
+ bool isUnordered() const { return MMO->isUnordered(); }
+
+ /// Returns true if the memory operation is neither atomic or volatile.
+ bool isSimple() const { return !isAtomic() && !isVolatile(); }
+
/// Return the type of the in-memory value.
EVT getMemoryVT() const { return MemoryVT; }
@@ -1371,8 +1348,18 @@
}
const SDValue &getChain() const { return getOperand(0); }
+
const SDValue &getBasePtr() const {
- return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
+ switch (getOpcode()) {
+ case ISD::STORE:
+ case ISD::MSTORE:
+ return getOperand(2);
+ case ISD::MGATHER:
+ case ISD::MSCATTER:
+ return getOperand(3);
+ default:
+ return getOperand(1);
+ }
}
// Methods to support isa and dyn_cast
@@ -1568,6 +1555,8 @@
uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) {
return Value->getLimitedValue(Limit);
}
+ MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
+ Align getAlignValue() const { return Value->getAlignValue(); }
bool isOne() const { return Value->isOne(); }
bool isNullValue() const { return Value->isZero(); }
@@ -1702,16 +1691,16 @@
const GlobalValue *TheGlobal;
int64_t Offset;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
const GlobalValue *GA, EVT VT, int64_t o,
- unsigned char TF);
+ unsigned TF);
public:
const GlobalValue *getGlobal() const { return TheGlobal; }
int64_t getOffset() const { return Offset; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
// Return the address space this GlobalAddress belongs to.
unsigned getAddressSpace() const;
@@ -1774,20 +1763,46 @@
}
};
+/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
+/// the index of the basic block being probed. A pseudo probe serves as a place
+/// holder and will be removed at the end of compilation. It does not have any
+/// operand because we do not want the instruction selection to deal with any.
+class PseudoProbeSDNode : public SDNode {
+ friend class SelectionDAG;
+ uint64_t Guid;
+ uint64_t Index;
+ uint32_t Attributes;
+
+ PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
+ SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
+ : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
+ Attributes(Attr) {}
+
+public:
+ uint64_t getGuid() const { return Guid; }
+ uint64_t getIndex() const { return Index; }
+ uint32_t getAttributes() const { return Attributes; }
+
+ // Methods to support isa and dyn_cast
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::PSEUDO_PROBE;
+ }
+};
+
class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
int JTI;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
- JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
+ JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
: SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
}
public:
int getIndex() const { return JTI; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::JumpTable ||
@@ -1803,23 +1818,23 @@
MachineConstantPoolValue *MachineCPVal;
} Val;
int Offset; // It's a MachineConstantPoolValue if top bit is set.
- unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
- unsigned char TargetFlags;
+ Align Alignment; // Minimum alignment requirement of CP.
+ unsigned TargetFlags;
ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
- unsigned Align, unsigned char TF)
- : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
- DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
- TargetFlags(TF) {
+ Align Alignment, unsigned TF)
+ : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
+ DebugLoc(), getSDVTList(VT)),
+ Offset(o), Alignment(Alignment), TargetFlags(TF) {
assert(Offset >= 0 && "Offset is too large");
Val.ConstVal = c;
}
- ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
- EVT VT, int o, unsigned Align, unsigned char TF)
- : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
- DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
- TargetFlags(TF) {
+ ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
+ Align Alignment, unsigned TF)
+ : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
+ DebugLoc(), getSDVTList(VT)),
+ Offset(o), Alignment(Alignment), TargetFlags(TF) {
assert(Offset >= 0 && "Offset is too large");
Val.MachineCPVal = v;
Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
@@ -1846,8 +1861,8 @@
// Return the alignment of this constant pool object, which is either 0 (for
// default alignment) or the desired value.
- unsigned getAlignment() const { return Alignment; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ Align getAlign() const { return Alignment; }
+ unsigned getTargetFlags() const { return TargetFlags; }
Type *getType() const;
@@ -1861,16 +1876,16 @@
class TargetIndexSDNode : public SDNode {
friend class SelectionDAG;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
int Index;
int64_t Offset;
public:
- TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
- : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
- TargetFlags(TF), Index(Idx), Offset(Ofs) {}
+ TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
+ : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
+ TargetFlags(TF), Index(Idx), Offset(Ofs) {}
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
int getIndex() const { return Index; }
int64_t getOffset() const { return Offset; }
@@ -1934,6 +1949,33 @@
/// the vector width and set the bits where elements are undef.
SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
+ /// Find the shortest repeating sequence of values in the build vector.
+ ///
+ /// e.g. { u, X, u, X, u, u, X, u } -> { X }
+ /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
+ ///
+ /// Currently this must be a power-of-2 build vector.
+ /// The DemandedElts mask indicates the elements that must be present,
+ /// undemanded elements in Sequence may be null (SDValue()). If passed a
+ /// non-null UndefElements bitvector, it will resize it to match the original
+ /// vector width and set the bits where elements are undef. If result is
+ /// false, Sequence will be empty.
+ bool getRepeatedSequence(const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements = nullptr) const;
+
+ /// Find the shortest repeating sequence of values in the build vector.
+ ///
+ /// e.g. { u, X, u, X, u, u, X, u } -> { X }
+ /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
+ ///
+ /// Currently this must be a power-of-2 build vector.
+ /// If passed a non-null UndefElements bitvector, it will resize it to match
+ /// the original vector width and set the bits where elements are undef.
+ /// If result is false, Sequence will be empty.
+ bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements = nullptr) const;
+
/// Returns the demanded splatted constant or null if this is not a constant
/// splat.
///
@@ -2027,13 +2069,13 @@
class RegisterSDNode : public SDNode {
friend class SelectionDAG;
- unsigned Reg;
+ Register Reg;
- RegisterSDNode(unsigned reg, EVT VT)
+ RegisterSDNode(Register reg, EVT VT)
: SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
public:
- unsigned getReg() const { return Reg; }
+ Register getReg() const { return Reg; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::Register;
@@ -2063,17 +2105,17 @@
const BlockAddress *BA;
int64_t Offset;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
- int64_t o, unsigned char Flags)
+ int64_t o, unsigned Flags)
: SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
BA(ba), Offset(o), TargetFlags(Flags) {}
public:
const BlockAddress *getBlockAddress() const { return BA; }
int64_t getOffset() const { return Offset; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BlockAddress ||
@@ -2104,15 +2146,16 @@
friend class SelectionDAG;
const char *Symbol;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
- ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
- : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
- 0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {}
+ ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
+ : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
+ DebugLoc(), getSDVTList(VT)),
+ Symbol(Sym), TargetFlags(TF) {}
public:
const char *getSymbol() const { return Symbol; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ExternalSymbol ||
@@ -2181,8 +2224,6 @@
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
LSBaseSDNodeBits.AddressingMode = AM;
assert(getAddressingMode() == AM && "Value truncated");
- assert((!MMO->isAtomic() || MMO->isVolatile()) &&
- "use an AtomicSDNode instead for non-volatile atomics");
}
const SDValue &getOffset() const {
@@ -2272,19 +2313,35 @@
friend class SelectionDAG;
MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, EVT MemVT,
MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
-
- // MaskedLoadSDNode (Chain, ptr, mask, passthru)
- // MaskedStoreSDNode (Chain, data, ptr, mask)
- // Mask is a vector of i1 elements
- const SDValue &getBasePtr() const {
- return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = AM;
+ assert(getAddressingMode() == AM && "Value truncated");
}
- const SDValue &getMask() const {
+
+ // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
+ // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
+ // Mask is a vector of i1 elements
+ const SDValue &getOffset() const {
return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
}
+ const SDValue &getMask() const {
+ return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
+ }
+
+ /// Return the addressing mode for this load or store:
+ /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+ ISD::MemIndexedMode getAddressingMode() const {
+ return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
+ }
+
+ /// Return true if this is a pre/post inc/dec load/store.
+ bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+ /// Return true if this is NOT a pre/post inc/dec load/store.
+ bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD ||
@@ -2298,9 +2355,9 @@
friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
- MachineMemOperand *MMO)
- : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
+ ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
+ bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
LoadSDNodeBits.ExtTy = ETy;
LoadSDNodeBits.IsExpanding = IsExpanding;
}
@@ -2310,8 +2367,9 @@
}
const SDValue &getBasePtr() const { return getOperand(1); }
- const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getPassThru() const { return getOperand(3); }
+ const SDValue &getOffset() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getPassThru() const { return getOperand(4); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
@@ -2326,9 +2384,9 @@
friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- bool isTrunc, bool isCompressing, EVT MemVT,
- MachineMemOperand *MMO)
- : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
+ ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
+ EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
StoreSDNodeBits.IsCompressing = isCompressing;
}
@@ -2344,9 +2402,10 @@
/// memory at base_addr.
bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
- const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getValue() const { return getOperand(1); }
const SDValue &getBasePtr() const { return getOperand(2); }
- const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getOffset() const { return getOperand(3); }
+ const SDValue &getMask() const { return getOperand(4); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;
@@ -2362,8 +2421,27 @@
MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ assert(getIndexType() == IndexType && "Value truncated");
+ }
+
+ /// How is Index applied to BasePtr when computing addresses.
+ ISD::MemIndexType getIndexType() const {
+ return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
+ }
+ void setIndexType(ISD::MemIndexType IndexType) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ }
+ bool isIndexScaled() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::UNSIGNED_SCALED);
+ }
+ bool isIndexSigned() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::SIGNED_UNSCALED);
+ }
// In the both nodes address is Op1, mask is Op2:
// MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
@@ -2387,11 +2465,19 @@
friend class SelectionDAG;
MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
+ : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {
+ LoadSDNodeBits.ExtTy = ETy;
+ }
const SDValue &getPassThru() const { return getOperand(1); }
+ ISD::LoadExtType getExtensionType() const {
+ return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
+ }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MGATHER;
}
@@ -2404,8 +2490,17 @@
friend class SelectionDAG;
MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType, bool IsTrunc)
+ : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {
+ StoreSDNodeBits.IsTruncating = IsTrunc;
+ }
+
+ /// Return true if the op does a truncation before store.
+ /// For integers this is the same as doing a TRUNCATE and storing the result.
+ /// For floats, it is the same as doing an FP_ROUND and storing the result.
+ bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
const SDValue &getValue() const { return getOperand(1); }
@@ -2478,6 +2573,22 @@
}
};
+/// An SDNode that records if a register contains a value that is guaranteed to
+/// be aligned accordingly.
+class AssertAlignSDNode : public SDNode {
+ Align Alignment;
+
+public:
+ AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
+ : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
+
+ Align getAlign() const { return Alignment; }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::AssertAlign;
+ }
+};
+
class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
SDNode, ptrdiff_t> {
const SDNode *Node;
@@ -2539,7 +2650,8 @@
/// with 4 and 8 byte pointer alignment, respectively.
using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
BlockAddressSDNode,
- GlobalAddressSDNode>;
+ GlobalAddressSDNode,
+ PseudoProbeSDNode>;
/// The SDNode class with the greatest alignment requirement.
using MostAlignedSDNode = GlobalAddressSDNode;
@@ -2622,6 +2734,16 @@
SDValue LHS, SDValue RHS,
std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
bool AllowUndefs = false, bool AllowTypeMismatch = false);
+
+ /// Returns true if the specified value is the overflow result from one
+ /// of the overflow intrinsic nodes.
+ inline bool isOverflowIntrOpRes(SDValue Op) {
+ unsigned Opc = Op.getOpcode();
+ return (Op.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
+ }
+
} // end namespace ISD
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 7c9f57b..78f6fc6 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -51,7 +51,7 @@
virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1,
SDValue Op2, SDValue Op3,
- unsigned Align, bool isVolatile,
+ Align Alignment, bool isVolatile,
bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
@@ -66,7 +66,7 @@
/// lowering strategy should be used.
virtual SDValue EmitTargetCodeForMemmove(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1,
- SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile,
+ SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
return SDValue();
}
@@ -80,12 +80,12 @@
virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1,
SDValue Op2, SDValue Op3,
- unsigned Align, bool isVolatile,
+ Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
return SDValue();
}
- /// Emit target-specific code that performs a memcmp, in cases where that is
+ /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
/// faster than a libcall. The first returned SDValue is the result of the
/// memcmp and the second is the chain. Both SDValues can be null if a normal
/// libcall should be used.
@@ -147,11 +147,24 @@
return std::make_pair(SDValue(), SDValue());
}
+ virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Addr,
+ SDValue Size,
+ MachinePointerInfo DstPtrInfo,
+ bool ZeroData) const {
+ return SDValue();
+ }
+
// Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
// than FMUL and ADD is delegated to the machine combiner.
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
return false;
}
+
+ // Return true if the DAG Combiner should disable generic combines.
+ virtual bool disableGenericCombines(CodeGenOpt::Level OptLevel) const {
+ return false;
+ }
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/SlotIndexes.h b/linux-x64/clang/include/llvm/CodeGen/SlotIndexes.h
index 10ab4cc..b2133de 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SlotIndexes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SlotIndexes.h
@@ -347,14 +347,9 @@
public:
static char ID;
- SlotIndexes() : MachineFunctionPass(ID) {
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- }
+ SlotIndexes();
- ~SlotIndexes() override {
- // The indexList's nodes are all allocated in the BumpPtrAllocator.
- indexList.clearAndLeakNodesUnsafely();
- }
+ ~SlotIndexes() override;
void getAnalysisUsage(AnalysisUsage &au) const override;
void releaseMemory() override;
@@ -387,13 +382,15 @@
}
/// Returns the base index for the given instruction.
- SlotIndex getInstructionIndex(const MachineInstr &MI) const {
+ SlotIndex getInstructionIndex(const MachineInstr &MI,
+ bool IgnoreBundle = false) const {
// Instructions inside a bundle have the same number as the bundle itself.
auto BundleStart = getBundleStart(MI.getIterator());
auto BundleEnd = getBundleEnd(MI.getIterator());
// Use the first non-debug instruction in the bundle to get SlotIndex.
const MachineInstr &BundleNonDebug =
- *skipDebugInstructionsForward(BundleStart, BundleEnd);
+ IgnoreBundle ? MI
+ : *skipDebugInstructionsForward(BundleStart, BundleEnd);
assert(!BundleNonDebug.isDebugInstr() &&
"Could not use a debug instruction to query mi2iMap.");
Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleNonDebug);
@@ -578,7 +575,11 @@
/// Removes machine instruction (bundle) \p MI from the mapping.
/// This should be called before MachineInstr::eraseFromParent() is used to
/// remove a whole bundle or an unbundled instruction.
- void removeMachineInstrFromMaps(MachineInstr &MI);
+ /// If \p AllowBundled is set then this can be used on a bundled
+ /// instruction; however, this exists to support handleMoveIntoBundle,
+ /// and in general removeSingleMachineInstrFromMaps should be used instead.
+ void removeMachineInstrFromMaps(MachineInstr &MI,
+ bool AllowBundled = false);
/// Removes a single machine instruction \p MI from the mapping.
/// This should be called before MachineInstr::eraseFromBundle() is used to
@@ -603,30 +604,27 @@
}
/// Add the given MachineBasicBlock into the maps.
+ /// If it contains any instructions then they must already be in the maps.
+ /// This is used after a block has been split by moving some suffix of its
+ /// instructions into a newly created block.
void insertMBBInMaps(MachineBasicBlock *mbb) {
- MachineFunction::iterator nextMBB =
- std::next(MachineFunction::iterator(mbb));
+ assert(mbb != &mbb->getParent()->front() &&
+ "Can't insert a new block at the beginning of a function.");
+ auto prevMBB = std::prev(MachineFunction::iterator(mbb));
- IndexListEntry *startEntry = nullptr;
- IndexListEntry *endEntry = nullptr;
- IndexList::iterator newItr;
- if (nextMBB == mbb->getParent()->end()) {
- startEntry = &indexList.back();
- endEntry = createEntry(nullptr, 0);
- newItr = indexList.insertAfter(startEntry->getIterator(), endEntry);
- } else {
- startEntry = createEntry(nullptr, 0);
- endEntry = getMBBStartIdx(&*nextMBB).listEntry();
- newItr = indexList.insert(endEntry->getIterator(), startEntry);
- }
+ // Create a new entry to be used for the start of mbb and the end of
+ // prevMBB.
+ IndexListEntry *startEntry = createEntry(nullptr, 0);
+ IndexListEntry *endEntry = getMBBEndIdx(&*prevMBB).listEntry();
+ IndexListEntry *insEntry =
+ mbb->empty() ? endEntry
+ : getInstructionIndex(mbb->front()).listEntry();
+ IndexList::iterator newItr =
+ indexList.insert(insEntry->getIterator(), startEntry);
SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
SlotIndex endIdx(endEntry, SlotIndex::Slot_Block);
- MachineFunction::iterator prevMBB(mbb);
- assert(prevMBB != mbb->getParent()->end() &&
- "Can't insert a new block at the beginning of a function.");
- --prevMBB;
MBBRanges[prevMBB->getNumber()].second = startIdx;
assert(unsigned(mbb->getNumber()) == MBBRanges.size() &&
diff --git a/linux-x64/clang/include/llvm/CodeGen/Spiller.h b/linux-x64/clang/include/llvm/CodeGen/Spiller.h
new file mode 100644
index 0000000..a693d64
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/Spiller.h
@@ -0,0 +1,42 @@
+//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPILLER_H
+#define LLVM_LIB_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+class LiveRangeEdit;
+class MachineFunction;
+class MachineFunctionPass;
+class VirtRegMap;
+
+/// Spiller interface.
+///
+/// Implementations are utility classes which insert spill or remat code on
+/// demand.
+class Spiller {
+ virtual void anchor();
+
+public:
+ virtual ~Spiller() = 0;
+
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
+
+ virtual void postOptimization() {}
+};
+
+/// Create and return a spiller that will insert spill code directly instead
+/// of deferring though VirtRegMap.
+Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SPILLER_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/StableHashing.h b/linux-x64/clang/include/llvm/CodeGen/StableHashing.h
new file mode 100644
index 0000000..caf27e1
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/StableHashing.h
@@ -0,0 +1,112 @@
+//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides types and functions for computing and combining stable
+// hashes. Stable hashes can be useful for hashing across different modules,
+// processes, or compiler runs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_STABLEHASHING_H
+#define LLVM_CODEGEN_STABLEHASHING_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+/// An opaque object representing a stable hash code. It can be serialized,
+/// deserialized, and is stable across processes and executions.
+using stable_hash = uint64_t;
+
+// Implementation details
+namespace hashing {
+namespace detail {
+
+// Stable hashes are based on the 64-bit FNV-1 hash:
+// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
+
+const uint64_t FNV_PRIME_64 = 1099511628211u;
+const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
+
+inline void stable_hash_append(stable_hash &Hash, const char Value) {
+ Hash = Hash ^ (Value & 0xFF);
+ Hash = Hash * FNV_PRIME_64;
+}
+
+inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
+ for (unsigned I = 0; I < 8; ++I) {
+ stable_hash_append(Hash, static_cast<char>(Value));
+ Value >>= 8;
+ }
+}
+
+} // namespace detail
+} // namespace hashing
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
+ stable_hash C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ hashing::detail::stable_hash_append(Hash, C);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
+ stable_hash C, stable_hash D) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ hashing::detail::stable_hash_append(Hash, C);
+ hashing::detail::stable_hash_append(Hash, D);
+ return Hash;
+}
+
+/// Compute a stable_hash for a sequence of values.
+///
+/// This hashes a sequence of values. It produces the same stable_hash as
+/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
+/// sequences and is significantly faster given pointers and types which
+/// can be hashed as a sequence of bytes.
+template <typename InputIteratorT>
+stable_hash stable_hash_combine_range(InputIteratorT First,
+ InputIteratorT Last) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ for (auto I = First; I != Last; ++I)
+ hashing::detail::stable_hash_append(Hash, *I);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ for (size_t I = 0; I < C; ++I)
+ hashing::detail::stable_hash_append(Hash, P[I]);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine_string(const StringRef &S) {
+ return stable_hash_combine_range(S.begin(), S.end());
+}
+
+inline stable_hash stable_hash_combine_string(const char *C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ while (*C)
+ hashing::detail::stable_hash_append(Hash, *(C++));
+ return Hash;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/StackMaps.h b/linux-x64/clang/include/llvm/CodeGen/StackMaps.h
index d7d88de..928d7cc 100644
--- a/linux-x64/clang/include/llvm/CodeGen/StackMaps.h
+++ b/linux-x64/clang/include/llvm/CodeGen/StackMaps.h
@@ -148,46 +148,104 @@
/// <StackMaps::ConstantOp>, <calling convention>,
/// <StackMaps::ConstantOp>, <statepoint flags>,
/// <StackMaps::ConstantOp>, <num deopt args>, [deopt args...],
-/// <gc base/derived pairs...> <gc allocas...>
-/// Note that the last two sets of arguments are not currently length
-/// prefixed.
+/// <StackMaps::ConstantOp>, <num gc pointer args>, [gc pointer args...],
+/// <StackMaps::ConstantOp>, <num gc allocas>, [gc allocas args...],
+/// <StackMaps::ConstantOp>, <num entries in gc map>, [base/derived pairs]
+/// base/derived pairs in gc map are logical indices into <gc pointer args>
+/// section.
+/// All gc pointers assigned to VRegs produce new value (in form of MI Def
+/// operand) and are tied to it.
class StatepointOpers {
// TODO:: we should change the STATEPOINT representation so that CC and
// Flags should be part of meta operands, with args and deopt operands, and
// gc operands all prefixed by their length and a type code. This would be
// much more consistent.
-public:
- // These values are aboolute offsets into the operands of the statepoint
+
+ // These values are absolute offsets into the operands of the statepoint
// instruction.
enum { IDPos, NBytesPos, NCallArgsPos, CallTargetPos, MetaEnd };
- // These values are relative offests from the start of the statepoint meta
+ // These values are relative offsets from the start of the statepoint meta
// arguments (i.e. the end of the call arguments).
enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 };
- explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {}
+public:
+ explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {
+ NumDefs = MI->getNumDefs();
+ }
+
+ /// Get index of statepoint ID operand.
+ unsigned getIDPos() const { return NumDefs + IDPos; }
+
+ /// Get index of Num Patch Bytes operand.
+ unsigned getNBytesPos() const { return NumDefs + NBytesPos; }
+
+ /// Get index of Num Call Arguments operand.
+ unsigned getNCallArgsPos() const { return NumDefs + NCallArgsPos; }
/// Get starting index of non call related arguments
/// (calling convention, statepoint flags, vm state and gc state).
unsigned getVarIdx() const {
- return MI->getOperand(NCallArgsPos).getImm() + MetaEnd;
+ return MI->getOperand(NumDefs + NCallArgsPos).getImm() + MetaEnd + NumDefs;
+ }
+
+ /// Get index of Calling Convention operand.
+ unsigned getCCIdx() const { return getVarIdx() + CCOffset; }
+
+ /// Get index of Flags operand.
+ unsigned getFlagsIdx() const { return getVarIdx() + FlagsOffset; }
+
+ /// Get index of Number Deopt Arguments operand.
+ unsigned getNumDeoptArgsIdx() const {
+ return getVarIdx() + NumDeoptOperandsOffset;
}
/// Return the ID for the given statepoint.
- uint64_t getID() const { return MI->getOperand(IDPos).getImm(); }
+ uint64_t getID() const { return MI->getOperand(NumDefs + IDPos).getImm(); }
/// Return the number of patchable bytes the given statepoint should emit.
uint32_t getNumPatchBytes() const {
- return MI->getOperand(NBytesPos).getImm();
+ return MI->getOperand(NumDefs + NBytesPos).getImm();
}
- /// Returns the target of the underlying call.
+ /// Return the target of the underlying call.
const MachineOperand &getCallTarget() const {
- return MI->getOperand(CallTargetPos);
+ return MI->getOperand(NumDefs + CallTargetPos);
}
+ /// Return the calling convention.
+ CallingConv::ID getCallingConv() const {
+ return MI->getOperand(getCCIdx()).getImm();
+ }
+
+ /// Return the statepoint flags.
+ uint64_t getFlags() const { return MI->getOperand(getFlagsIdx()).getImm(); }
+
+ uint64_t getNumDeoptArgs() const {
+ return MI->getOperand(getNumDeoptArgsIdx()).getImm();
+ }
+
+ /// Get index of number of gc map entries.
+ unsigned getNumGcMapEntriesIdx();
+
+ /// Get index of number of gc allocas.
+ unsigned getNumAllocaIdx();
+
+ /// Get index of number of GC pointers.
+ unsigned getNumGCPtrIdx();
+
+ /// Get index of first GC pointer operand of -1 if there are none.
+ int getFirstGCPtrIdx();
+
+ /// Get vector of base/derived pairs from statepoint.
+ /// Elements are indices into GC Pointer operand list (logical).
+ /// Returns number of elements in GCMap.
+ unsigned
+ getGCPointerMap(SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap);
+
private:
const MachineInstr *MI;
+ unsigned NumDefs;
};
class StackMaps {
@@ -229,6 +287,10 @@
StackMaps(AsmPrinter &AP);
+ /// Get index of next meta operand.
+ /// Similar to parseOperand, but does not actually parses operand meaning.
+ static unsigned getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx);
+
void reset() {
CSInfos.clear();
ConstPool.clear();
@@ -266,13 +328,16 @@
/// Generate a stackmap record for a stackmap instruction.
///
/// MI must be a raw STACKMAP, not a PATCHPOINT.
- void recordStackMap(const MachineInstr &MI);
+ void recordStackMap(const MCSymbol &L,
+ const MachineInstr &MI);
/// Generate a stackmap record for a patchpoint instruction.
- void recordPatchPoint(const MachineInstr &MI);
+ void recordPatchPoint(const MCSymbol &L,
+ const MachineInstr &MI);
/// Generate a stackmap record for a statepoint instruction.
- void recordStatepoint(const MachineInstr &MI);
+ void recordStatepoint(const MCSymbol &L,
+ const MachineInstr &MI);
/// If there is any stack map data, create a stack map section and serialize
/// the map info into it. This clears the stack map data structures
@@ -298,6 +363,13 @@
MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
LiveOutVec &LiveOuts) const;
+ /// Specialized parser of statepoint operands.
+ /// They do not directly correspond to StackMap record entries.
+ void parseStatepointOpers(const MachineInstr &MI,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ LocationVec &Locations, LiveOutVec &LiveOuts);
+
/// Create a live-out register record for the given register @p Reg.
LiveOutReg createLiveOutReg(unsigned Reg,
const TargetRegisterInfo *TRI) const;
@@ -306,12 +378,15 @@
/// registers that need to be recorded in the stackmap.
LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const;
- /// This should be called by the MC lowering code _immediately_ before
- /// lowering the MI to an MCInst. It records where the operands for the
- /// instruction are stored, and outputs a label to record the offset of
- /// the call from the start of the text section. In special cases (e.g. AnyReg
- /// calling convention) the return register is also recorded if requested.
- void recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+ /// Record the locations of the operands of the provided instruction in a
+ /// record keyed by the provided label. For instructions w/AnyReg calling
+ /// convention the return register is also recorded if requested. For
+ /// STACKMAP, and PATCHPOINT the label is expected to immediately *preceed*
+ /// lowering of the MI to MCInsts. For STATEPOINT, it expected to
+ /// immediately *follow*. It's not clear this difference was intentional,
+ /// but it exists today.
+ void recordStackMapOpers(const MCSymbol &L,
+ const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult = false);
diff --git a/linux-x64/clang/include/llvm/CodeGen/StackProtector.h b/linux-x64/clang/include/llvm/CodeGen/StackProtector.h
index 2bdf442..f6513e8 100644
--- a/linux-x64/clang/include/llvm/CodeGen/StackProtector.h
+++ b/linux-x64/clang/include/llvm/CodeGen/StackProtector.h
@@ -61,6 +61,12 @@
/// protection when -fstack-protection is used.
unsigned SSPBufferSize = 0;
+ /// VisitedPHIs - The set of PHI nodes visited when determining
+ /// if a variable's reference has been taken. This set
+ /// is maintained to ensure we don't visit the same PHI node multiple
+ /// times.
+ SmallPtrSet<const PHINode *, 16> VisitedPHIs;
+
// A prologue is generated.
bool HasPrologue = false;
@@ -89,7 +95,7 @@
bool InStruct = false) const;
/// Check whether a stack allocation has its address taken.
- bool HasAddressTaken(const Instruction *AI);
+ bool HasAddressTaken(const Instruction *AI, uint64_t AllocSize);
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
@@ -98,9 +104,7 @@
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ StackProtector();
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/linux-x64/clang/include/llvm/CodeGen/SwitchLoweringUtils.h b/linux-x64/clang/include/llvm/CodeGen/SwitchLoweringUtils.h
index 62134dc..51f1d7d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/linux-x64/clang/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -10,15 +10,21 @@
#define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/BranchProbability.h"
+#include <vector>
namespace llvm {
+class BlockFrequencyInfo;
+class ConstantInt;
class FunctionLoweringInfo;
class MachineBasicBlock;
+class ProfileSummaryInfo;
+class TargetLowering;
+class TargetMachine;
namespace SwitchCG {
@@ -212,16 +218,17 @@
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
+ bool OmitRangeCheck;
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
+ Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {}
};
-/// Return the range of value within a range.
+/// Return the range of values within a range.
uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
unsigned Last);
@@ -263,7 +270,8 @@
std::vector<BitTestBlock> BitTestCases;
void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB);
+ MachineBasicBlock *DefaultMBB,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
unsigned Last, const SwitchInst *SI,
@@ -294,4 +302,3 @@
} // namespace llvm
#endif // LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
-
diff --git a/linux-x64/clang/include/llvm/CodeGen/TailDuplicator.h b/linux-x64/clang/include/llvm/CodeGen/TailDuplicator.h
index 358798d..6862bb2 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TailDuplicator.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TailDuplicator.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include <utility>
#include <vector>
@@ -25,11 +26,13 @@
namespace llvm {
class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineInstr;
class MachineModuleInfo;
class MachineRegisterInfo;
+class ProfileSummaryInfo;
class TargetRegisterInfo;
/// Utility class to perform tail duplication.
@@ -40,18 +43,20 @@
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
+ MBFIWrapper *MBFI;
+ ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
unsigned TailDupSize;
// A list of virtual registers for which to update SSA form.
- SmallVector<unsigned, 16> SSAUpdateVRs;
+ SmallVector<Register, 16> SSAUpdateVRs;
// For each virtual register in SSAUpdateVals keep a list of source virtual
// registers.
- using AvailableValsTy = std::vector<std::pair<MachineBasicBlock *, unsigned>>;
+ using AvailableValsTy = std::vector<std::pair<MachineBasicBlock *, Register>>;
- DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+ DenseMap<Register, AvailableValsTy> SSAUpdateVals;
public:
/// Prepare to run on a specific machine function.
@@ -65,6 +70,8 @@
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
+ MBFIWrapper *MBFI,
+ ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);
bool tailDuplicateBlocks();
@@ -80,41 +87,44 @@
/// of predecessors that received a copy of \p MBB.
/// If \p RemovalCallback is non-null. It will be called before MBB is
/// deleted.
+ /// If \p CandidatePtr is not null, duplicate into these blocks only.
bool tailDuplicateAndUpdate(
bool IsSimple, MachineBasicBlock *MBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr,
- function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr);
private:
using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
- void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ void addSSAUpdateEntry(Register OrigReg, Register NewReg,
MachineBasicBlock *BB);
void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
- DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
- SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
- const DenseSet<unsigned> &UsedByPhi, bool Remove);
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies,
+ const DenseSet<Register> &UsedByPhi, bool Remove);
void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
- DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
- const DenseSet<unsigned> &UsedByPhi);
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ const DenseSet<Register> &UsedByPhi);
void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallSetVector<MachineBasicBlock *, 8> &Succs);
bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
bool duplicateSimpleBB(MachineBasicBlock *TailBB,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- const DenseSet<unsigned> &RegsUsedByPhi,
+ const DenseSet<Register> &RegsUsedByPhi,
SmallVectorImpl<MachineInstr *> &Copies);
bool tailDuplicate(bool IsSimple,
MachineBasicBlock *TailBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallVectorImpl<MachineInstr *> &Copies);
+ SmallVectorImpl<MachineInstr *> &Copies,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr);
void appendCopies(MachineBasicBlock *MBB,
- SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
SmallVectorImpl<MachineInstr *> &Copies);
void removeDeadBlock(
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetCallingConv.h b/linux-x64/clang/include/llvm/CodeGen/TargetCallingConv.h
index aebeeec..df974b4 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetCallingConv.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetCallingConv.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_TARGETCALLINGCONV_H
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -30,35 +31,44 @@
unsigned IsInReg : 1; ///< Passed in register
unsigned IsSRet : 1; ///< Hidden struct-ret ptr
unsigned IsByVal : 1; ///< Struct passed by value
+ unsigned IsByRef : 1; ///< Passed in memory
unsigned IsNest : 1; ///< Nested fn static chain
unsigned IsReturned : 1; ///< Always returned
unsigned IsSplit : 1;
unsigned IsInAlloca : 1; ///< Passed with inalloca
+ unsigned IsPreallocated : 1; ///< ByVal without the copy
unsigned IsSplitEnd : 1; ///< Last part of a split
unsigned IsSwiftSelf : 1; ///< Swift self parameter
unsigned IsSwiftError : 1; ///< Swift error parameter
+ unsigned IsCFGuardTarget : 1; ///< Control Flow Guard target
unsigned IsHva : 1; ///< HVA field for
unsigned IsHvaStart : 1; ///< HVA structure start
unsigned IsSecArgPass : 1; ///< Second argument
- unsigned ByValAlign : 4; ///< Log 2 of byval alignment
+ unsigned ByValOrByRefAlign : 4; ///< Log 2 of byval/byref alignment
unsigned OrigAlign : 5; ///< Log 2 of original alignment
unsigned IsInConsecutiveRegsLast : 1;
unsigned IsInConsecutiveRegs : 1;
unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
unsigned IsPointer : 1;
- unsigned ByValSize; ///< Byval struct size
+ unsigned ByValOrByRefSize; ///< Byval or byref struct size
unsigned PointerAddrSpace; ///< Address space of pointer argument
+ /// Set the alignment used by byref or byval parameters.
+ void setAlignImpl(Align A) {
+ ByValOrByRefAlign = encode(A);
+ assert(getNonZeroByValAlign() == A && "bitfield overflow");
+ }
+
public:
ArgFlagsTy()
- : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
- IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
- IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
- IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
- IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
- IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
+ : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0),
+ IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
+ IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
+ IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValOrByRefAlign(0),
+ OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
+ IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0),
PointerAddrSpace(0) {
static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
}
@@ -78,15 +88,24 @@
bool isByVal() const { return IsByVal; }
void setByVal() { IsByVal = 1; }
+ bool isByRef() const { return IsByRef; }
+ void setByRef() { IsByRef = 1; }
+
bool isInAlloca() const { return IsInAlloca; }
void setInAlloca() { IsInAlloca = 1; }
+ bool isPreallocated() const { return IsPreallocated; }
+ void setPreallocated() { IsPreallocated = 1; }
+
bool isSwiftSelf() const { return IsSwiftSelf; }
void setSwiftSelf() { IsSwiftSelf = 1; }
bool isSwiftError() const { return IsSwiftError; }
void setSwiftError() { IsSwiftError = 1; }
+ bool isCFGuardTarget() const { return IsCFGuardTarget; }
+ void setCFGuardTarget() { IsCFGuardTarget = 1; }
+
bool isHva() const { return IsHva; }
void setHva() { IsHva = 1; }
@@ -103,10 +122,12 @@
void setReturned() { IsReturned = 1; }
bool isInConsecutiveRegs() const { return IsInConsecutiveRegs; }
- void setInConsecutiveRegs() { IsInConsecutiveRegs = 1; }
+ void setInConsecutiveRegs(bool Flag = true) { IsInConsecutiveRegs = Flag; }
bool isInConsecutiveRegsLast() const { return IsInConsecutiveRegsLast; }
- void setInConsecutiveRegsLast() { IsInConsecutiveRegsLast = 1; }
+ void setInConsecutiveRegsLast(bool Flag = true) {
+ IsInConsecutiveRegsLast = Flag;
+ }
bool isSplit() const { return IsSplit; }
void setSplit() { IsSplit = 1; }
@@ -120,20 +141,56 @@
bool isPointer() const { return IsPointer; }
void setPointer() { IsPointer = 1; }
- unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
- void setByValAlign(unsigned A) {
- ByValAlign = Log2_32(A) + 1;
- assert(getByValAlign() == A && "bitfield overflow");
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned getByValAlign() const,
+ "Use getNonZeroByValAlign() instead") {
+ MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign);
+ return A ? A->value() : 0;
+ }
+ Align getNonZeroByValAlign() const {
+ MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign);
+ assert(A && "ByValAlign must be defined");
+ return *A;
+ }
+ void setByValAlign(Align A) {
+ assert(isByVal() && !isByRef());
+ setAlignImpl(A);
}
- unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; }
- void setOrigAlign(unsigned A) {
- OrigAlign = Log2_32(A) + 1;
- assert(getOrigAlign() == A && "bitfield overflow");
+ void setByRefAlign(Align A) {
+ assert(!isByVal() && isByRef());
+ setAlignImpl(A);
}
- unsigned getByValSize() const { return ByValSize; }
- void setByValSize(unsigned S) { ByValSize = S; }
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned getOrigAlign() const,
+ "Use getNonZeroOrigAlign() instead") {
+ MaybeAlign A = decodeMaybeAlign(OrigAlign);
+ return A ? A->value() : 0;
+ }
+ Align getNonZeroOrigAlign() const {
+ return decodeMaybeAlign(OrigAlign).valueOrOne();
+ }
+ void setOrigAlign(Align A) {
+ OrigAlign = encode(A);
+ assert(getNonZeroOrigAlign() == A && "bitfield overflow");
+ }
+
+ unsigned getByValSize() const {
+ assert(isByVal() && !isByRef());
+ return ByValOrByRefSize;
+ }
+ void setByValSize(unsigned S) {
+ assert(isByVal() && !isByRef());
+ ByValOrByRefSize = S;
+ }
+
+ unsigned getByRefSize() const {
+ assert(!isByVal() && isByRef());
+ return ByValOrByRefSize;
+ }
+ void setByRefSize(unsigned S) {
+ assert(!isByVal() && isByRef());
+ ByValOrByRefSize = S;
+ }
unsigned getPointerAddrSpace() const { return PointerAddrSpace; }
void setPointerAddrSpace(unsigned AS) { PointerAddrSpace = AS; }
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetFrameLowering.h b/linux-x64/clang/include/llvm/CodeGen/TargetFrameLowering.h
index 878c9ff..792452f 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetFrameLowering.h
@@ -14,8 +14,7 @@
#define LLVM_CODEGEN_TARGETFRAMELOWERING_H
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/ADT/StringSwitch.h"
-#include <utility>
+#include "llvm/Support/TypeSize.h"
#include <vector>
namespace llvm {
@@ -28,6 +27,7 @@
enum Value {
Default = 0,
SGPRSpill = 1,
+ ScalableVector = 2,
NoAlloc = 255
};
}
@@ -51,17 +51,32 @@
unsigned Reg;
int Offset; // Offset relative to stack pointer on function entry.
};
+
+ struct DwarfFrameBase {
+ // The frame base may be either a register (the default), the CFA,
+ // or a WebAssembly-specific location description.
+ enum FrameBaseKind { Register, CFA, WasmFrameBase } Kind;
+ struct WasmFrameBase {
+ unsigned Kind; // Wasm local, global, or value stack
+ unsigned Index;
+ };
+ union {
+ unsigned Reg;
+ struct WasmFrameBase WasmLoc;
+ } Location;
+ };
+
private:
StackDirection StackDir;
- unsigned StackAlignment;
- unsigned TransientStackAlignment;
+ Align StackAlignment;
+ Align TransientStackAlignment;
int LocalAreaOffset;
bool StackRealignable;
public:
- TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1, bool StackReal = true)
- : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
- LocalAreaOffset(LAO), StackRealignable(StackReal) {}
+ TargetFrameLowering(StackDirection D, Align StackAl, int LAO,
+ Align TransAl = Align(1), bool StackReal = true)
+ : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+ LocalAreaOffset(LAO), StackRealignable(StackReal) {}
virtual ~TargetFrameLowering();
@@ -76,7 +91,12 @@
/// stack pointer must be aligned on entry to a function. Typically, this
/// is the largest alignment for any data object in the target.
///
- unsigned getStackAlignment() const { return StackAlignment; }
+ unsigned getStackAlignment() const { return StackAlignment.value(); }
+ /// getStackAlignment - This method returns the number of bytes to which the
+ /// stack pointer must be aligned on entry to a function. Typically, this
+ /// is the largest alignment for any data object in the target.
+ ///
+ Align getStackAlign() const { return StackAlignment; }
/// alignSPAdjust - This method aligns the stack adjustment to the correct
/// alignment.
@@ -94,9 +114,15 @@
/// which the stack pointer must be aligned at all times, even between
/// calls.
///
- unsigned getTransientStackAlignment() const {
- return TransientStackAlignment;
+ LLVM_ATTRIBUTE_DEPRECATED(unsigned getTransientStackAlignment() const,
+ "Use getTransientStackAlign instead") {
+ return TransientStackAlignment.value();
}
+ /// getTransientStackAlignment - This method returns the number of bytes to
+ /// which the stack pointer must be aligned at all times, even between
+ /// calls.
+ ///
+ Align getTransientStackAlign() const { return TransientStackAlignment; }
/// isStackRealignable - This method returns whether the stack can be
/// realigned.
@@ -109,6 +135,12 @@
/// was called).
virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
+ /// This method returns whether or not it is safe for an object with the
+ /// given stack id to be bundled into the local area.
+ virtual bool isStackIdSafeForLocalArea(unsigned StackId) const {
+ return true;
+ }
+
/// getOffsetOfLocalArea - This method returns the offset of the local area
/// from the stack pointer on entrance to a function.
///
@@ -177,6 +209,17 @@
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const = 0;
+ /// With basic block sections, emit callee saved frame moves for basic blocks
+ /// that are in a different section.
+ virtual void
+ emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {}
+
+ virtual void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ bool IsPrologue) const {}
+
/// Replace a StackProbe stub (if any) with the actual probe code inline
virtual void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const {}
@@ -197,7 +240,7 @@
/// storeRegToStackSlot(). Returns false otherwise.
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const {
return false;
}
@@ -208,10 +251,11 @@
/// If it returns true, and any of the registers in CSI is not restored,
/// it sets the corresponding Restored flag in CSI to false.
/// Returns false otherwise.
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const {
return false;
}
@@ -254,8 +298,8 @@
/// getFrameIndexReference - This method should return the base register
/// and offset used to reference a frame index location. The offset is
/// returned directly, and the base register is returned via FrameReg.
- virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const;
+ virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const;
/// Same as \c getFrameIndexReference, except that the stack pointer (as
/// opposed to the frame pointer) will be the preferred value for \p
@@ -263,9 +307,10 @@
/// use offsets from RSP. If \p IgnoreSPUpdates is true, the returned
/// offset is only guaranteed to be valid with respect to the value of SP at
/// the end of the prologue.
- virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- unsigned &FrameReg,
- bool IgnoreSPUpdates) const {
+ virtual StackOffset
+ getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ bool IgnoreSPUpdates) const {
// Always safe to dispatch to getFrameIndexReference.
return getFrameIndexReference(MF, FI, FrameReg);
}
@@ -273,14 +318,19 @@
/// getNonLocalFrameIndexReference - This method returns the offset used to
/// reference a frame index location. The offset can be from either FP/BP/SP
/// based on which base register is returned by llvm.localaddress.
- virtual int getNonLocalFrameIndexReference(const MachineFunction &MF,
- int FI) const {
+ virtual StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const {
// By default, dispatch to getFrameIndexReference. Interested targets can
// override this.
- unsigned FrameReg;
+ Register FrameReg;
return getFrameIndexReference(MF, FI, FrameReg);
}
+ /// Returns the callee-saved registers as computed by determineCalleeSaves
+ /// in the BitVector \p SavedRegs.
+ virtual void getCalleeSaves(const MachineFunction &MF,
+ BitVector &SavedRegs) const;
+
/// This method determines which of the registers reported by
/// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
/// The default implementation checks populates the \p SavedRegs bitset with
@@ -288,6 +338,9 @@
/// this function to save additional registers.
/// This method also sets up the register scavenger ensuring there is a free
/// register or a frameindex available.
+ /// This method should not be called by any passes outside of PEI, because
+ /// it may change state passed in by \p MF and \p RS. The preferred
+ /// interface outside PEI is getCalleeSaves.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const;
@@ -300,6 +353,13 @@
RegScavenger *RS = nullptr) const {
}
+ /// processFunctionBeforeFrameIndicesReplaced - This method is called
+ /// immediately before MO_FrameIndex operands are eliminated, but after the
+ /// frame is finalized. This method is optional.
+ virtual void
+ processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const {}
+
virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
report_fatal_error("WinEH not implemented for this target");
}
@@ -354,6 +414,11 @@
return true;
}
+ /// Returns the StackID that scalable vectors should be associated with.
+ virtual TargetStackID::Value getStackIDForScalableVectors() const {
+ return TargetStackID::Default;
+ }
+
virtual bool isSupportedStackID(TargetStackID::Value ID) const {
switch (ID) {
default:
@@ -366,15 +431,10 @@
/// Check if given function is safe for not having callee saved registers.
/// This is used when interprocedural register allocation is enabled.
- static bool isSafeForNoCSROpt(const Function &F) {
- if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
- !F.hasFnAttribute(Attribute::NoRecurse))
- return false;
- // Function should not be optimized as tail call.
- for (const User *U : F.users())
- if (auto CS = ImmutableCallSite(U))
- if (CS.isTailCall())
- return false;
+ static bool isSafeForNoCSROpt(const Function &F);
+
+ /// Check if the no-CSR optimisation is profitable for the given function.
+ virtual bool isProfitableForNoCSROpt(const Function &F) const {
return true;
}
@@ -384,7 +444,11 @@
/// Return initial CFA register value i.e. the one valid at the beginning of
/// the function (before any stack operations).
- virtual unsigned getInitialCFARegister(const MachineFunction &MF) const;
+ virtual Register getInitialCFARegister(const MachineFunction &MF) const;
+
+ /// Return the frame base information to be encoded in the DWARF subprogram
+ /// debug info.
+ virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetInstrInfo.h b/linux-x64/clang/include/llvm/CodeGen/TargetInstrInfo.h
index 314bb72..36afdef 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetInstrInfo.h
@@ -17,15 +17,15 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/None.h"
-#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -38,10 +38,12 @@
namespace llvm {
+class AAResults;
class DFAPacketizer;
class InstrItineraryData;
class LiveIntervals;
class LiveVariables;
+class MachineLoop;
class MachineMemOperand;
class MachineRegisterInfo;
class MCAsmInfo;
@@ -49,6 +51,7 @@
struct MCSchedModel;
class Module;
class ScheduleDAG;
+class ScheduleDAGMI;
class ScheduleHazardRecognizer;
class SDNode;
class SelectionDAG;
@@ -60,7 +63,32 @@
template <class T> class SmallVectorImpl;
-using ParamLoadedValue = std::pair<const MachineOperand*, DIExpression*>;
+using ParamLoadedValue = std::pair<MachineOperand, DIExpression*>;
+
+struct DestSourcePair {
+ const MachineOperand *Destination;
+ const MachineOperand *Source;
+
+ DestSourcePair(const MachineOperand &Dest, const MachineOperand &Src)
+ : Destination(&Dest), Source(&Src) {}
+};
+
+/// Used to describe a register and immediate addition.
+struct RegImmPair {
+ Register Reg;
+ int64_t Imm;
+
+ RegImmPair(Register Reg, int64_t Imm) : Reg(Reg), Imm(Imm) {}
+};
+
+/// Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
+/// It holds the register values, the scale value and the displacement.
+struct ExtAddrMode {
+ Register BaseReg;
+ Register ScaledReg;
+ int64_t Scale;
+ int64_t Displacement;
+};
//---------------------------------------------------------------------------
///
@@ -94,7 +122,7 @@
/// registers so that the instructions result is independent of the place
/// in the function.
bool isTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA = nullptr) const {
+ AAResults *AA = nullptr) const {
return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
(MI.getDesc().isRematerializable() &&
(isReallyTriviallyReMaterializable(MI, AA) ||
@@ -110,7 +138,7 @@
/// not always available.
/// Requirements must be check as stated in isTriviallyReMaterializable() .
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+ AAResults *AA) const {
return false;
}
@@ -153,7 +181,7 @@
/// this function does target-independent tests to determine if the
/// instruction is really trivially rematerializable.
bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI,
- AliasAnalysis *AA) const;
+ AAResults *AA) const;
public:
/// These methods return the opcode of the frame setup/destroy instructions
@@ -215,8 +243,8 @@
/// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's
/// expected the pre-extension value is available as a subreg of the result
/// register. This also returns the sub-register index in SubIdx.
- virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const {
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
+ Register &DstReg, unsigned &SubIdx) const {
return false;
}
@@ -321,6 +349,12 @@
unsigned &Size, unsigned &Offset,
const MachineFunction &MF) const;
+ /// Return true if the given instruction is terminator that is unspillable,
+ /// according to isUnspillableTerminatorImpl.
+ bool isUnspillableTerminator(const MachineInstr *MI) const {
+ return MI->isTerminator() && isUnspillableTerminatorImpl(MI);
+ }
+
/// Returns the size in bytes of the specified MachineInstr, or ~0U
/// when this function is not implemented by a target.
virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const {
@@ -348,7 +382,7 @@
/// DestReg:SubIdx. Any existing subreg index is preserved or composed with
/// SubIdx.
virtual void reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
unsigned SubIdx, const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const;
@@ -421,16 +455,17 @@
/// findCommutedOpIndices(MI, Op1, Op2);
/// can be interpreted as a query asking to find an operand that would be
/// commutable with the operand#1.
- virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ virtual bool findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const;
/// A pair composed of a register and a sub-register index.
/// Used to give some type checking when modeling Reg:SubReg.
struct RegSubRegPair {
- unsigned Reg;
+ Register Reg;
unsigned SubReg;
- RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0)
+ RegSubRegPair(Register Reg = Register(), unsigned SubReg = 0)
: Reg(Reg), SubReg(SubReg) {}
bool operator==(const RegSubRegPair& P) const {
@@ -447,7 +482,7 @@
struct RegSubRegPairAndIdx : RegSubRegPair {
unsigned SubIdx;
- RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0,
+ RegSubRegPairAndIdx(Register Reg = Register(), unsigned SubReg = 0,
unsigned SubIdx = 0)
: RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {}
};
@@ -623,7 +658,7 @@
}
/// Remove the branching code at the end of the specific MBB.
- /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// This is only invoked in cases where analyzeBranch returns success. It
/// returns the number of instructions that were removed.
/// If \p BytesRemoved is non-null, report the change in code size from the
/// removed instructions.
@@ -633,13 +668,13 @@
}
/// Insert branch code into the end of the specified MachineBasicBlock. The
- /// operands to this method are the same as those returned by AnalyzeBranch.
- /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// operands to this method are the same as those returned by analyzeBranch.
+ /// This is only invoked in cases where analyzeBranch returns success. It
/// returns the number of instructions inserted. If \p BytesAdded is non-null,
/// report the change in code size from the added instructions.
///
/// It is also invoked by tail merging to add unconditional branches in
- /// cases where AnalyzeBranch doesn't apply because there was no original
+ /// cases where analyzeBranch doesn't apply because there was no original
/// branch to analyze. At least this much must be implemented, else tail
/// merging needs to be disabled.
///
@@ -661,7 +696,51 @@
BytesAdded);
}
- /// Analyze the loop code, return true if it cannot be understoo. Upon
+ /// Object returned by analyzeLoopForPipelining. Allows software pipelining
+ /// implementations to query attributes of the loop being pipelined and to
+ /// apply target-specific updates to the loop once pipelining is complete.
+ class PipelinerLoopInfo {
+ public:
+ virtual ~PipelinerLoopInfo();
+ /// Return true if the given instruction should not be pipelined and should
+ /// be ignored. An example could be a loop comparison, or induction variable
+ /// update with no users being pipelined.
+ virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
+
+ /// Create a condition to determine if the trip count of the loop is greater
+ /// than TC.
+ ///
+ /// If the trip count is statically known to be greater than TC, return
+ /// true. If the trip count is statically known to be not greater than TC,
+ /// return false. Otherwise return nullopt and fill out Cond with the test
+ /// condition.
+ virtual Optional<bool>
+ createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) = 0;
+
+ /// Modify the loop such that the trip count is
+ /// OriginalTC + TripCountAdjust.
+ virtual void adjustTripCount(int TripCountAdjust) = 0;
+
+ /// Called when the loop's preheader has been modified to NewPreheader.
+ virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0;
+
+ /// Called when the loop is being removed. Any instructions in the preheader
+ /// should be removed.
+ ///
+ /// Once this function is called, no other functions on this object are
+ /// valid; the loop has been removed.
+ virtual void disposed() = 0;
+ };
+
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ virtual std::unique_ptr<PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ return nullptr;
+ }
+
+ /// Analyze the loop code, return true if it cannot be understood. Upon
/// success, this function returns false and returns information about the
/// induction variable and compare instruction used at the end.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
@@ -708,7 +787,7 @@
/// Second variant of isProfitableToIfCvt. This one
/// checks for the case where two basic blocks from true and false path
- /// of a if-then-else (diamond) are predicated on mutally exclusive
+ /// of a if-then-else (diamond) are predicated on mutually exclusive
/// predicates, where the probability of the true path being taken is given
/// by Probability, and Confidence is a measure of our confidence that it
/// will be properly predicted.
@@ -732,6 +811,19 @@
return false;
}
+ /// Return the increase in code size needed to predicate a contiguous run of
+ /// NumInsts instructions.
+ virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const {
+ return 0;
+ }
+
+ /// Return an estimate for the code size reduction (in bytes) which will be
+ /// caused by removing the given branch instruction during if-conversion.
+ virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const {
+ return getInstSizeInBytes(MI);
+ }
+
/// Return true if it's profitable to unpredicate
/// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
/// exclusive predicates.
@@ -759,16 +851,18 @@
/// Some x86 implementations have 2-cycle cmov instructions.
///
/// @param MBB Block where select instruction would be inserted.
- /// @param Cond Condition returned by AnalyzeBranch.
+ /// @param Cond Condition returned by analyzeBranch.
+ /// @param DstReg Virtual dest register that the result should write to.
/// @param TrueReg Virtual register to select when Cond is true.
/// @param FalseReg Virtual register to select when Cond is false.
/// @param CondCycles Latency from Cond+Branch to select output.
/// @param TrueCycles Latency from TrueReg to select output.
/// @param FalseCycles Latency from FalseReg to select output.
virtual bool canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg, int &CondCycles,
- int &TrueCycles, int &FalseCycles) const {
+ ArrayRef<MachineOperand> Cond, Register DstReg,
+ Register TrueReg, Register FalseReg,
+ int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
return false;
}
@@ -776,7 +870,7 @@
/// DstReg when Cond is true, and FalseReg to DstReg when Cond is false.
///
/// This function can only be called after canInsertSelect() returned true.
- /// The condition in Cond comes from AnalyzeBranch, and it can be assumed
+ /// The condition in Cond comes from analyzeBranch, and it can be assumed
/// that the same flags or registers required by Cond are available at the
/// insertion point.
///
@@ -784,13 +878,13 @@
/// @param I Insertion point.
/// @param DL Source location for debugging.
/// @param DstReg Virtual register to be defined by select instruction.
- /// @param Cond Condition as computed by AnalyzeBranch.
+ /// @param Cond Condition as computed by analyzeBranch.
/// @param TrueReg Virtual register to copy when Cond is true.
/// @param FalseReg Virtual register to copy when Cons is false.
virtual void insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const {
+ Register DstReg, ArrayRef<MachineOperand> Cond,
+ Register TrueReg, Register FalseReg) const {
llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
}
@@ -852,36 +946,62 @@
/// large registers. See for example the ARM target.
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
- unsigned DestReg, unsigned SrcReg,
+ MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const {
llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
}
protected:
- /// Target-dependent implemenation for IsCopyInstr.
+ /// Target-dependent implementation for IsCopyInstr.
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- virtual bool isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Source,
- const MachineOperand *&Destination) const {
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ virtual Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const {
+ return None;
+ }
+
+ /// Return true if the given terminator MI is not expected to spill. This
+ /// sets the live interval as not spillable and adjusts phi node lowering to
+ /// not introduce copies after the terminator. Use with care, these are
+ /// currently used for hardware loop intrinsics in very controlled situations,
+ /// created prior to registry allocation in loops that only have single phi
+ /// users for the terminators value. They may run out of registers if not used
+ /// carefully.
+ virtual bool isUnspillableTerminatorImpl(const MachineInstr *MI) const {
return false;
}
public:
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- /// For COPY-instruction the method naturally returns true, for all other
- /// instructions the method calls target-dependent implementation.
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const {
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ /// For COPY-instruction the method naturally returns destination and source
+ /// registers as machine operands, for all other instructions the method calls
+ /// target-dependent implementation.
+ Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI) const {
if (MI.isCopy()) {
- Destination = &MI.getOperand(0);
- Source = &MI.getOperand(1);
- return true;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
- return isCopyInstrImpl(MI, Source, Destination);
+ return isCopyInstrImpl(MI);
+ }
+
+ /// If the specific machine instruction is an instruction that adds an
+ /// immediate value and a physical register, and stores the result in
+ /// the given physical register \c Reg, return a pair of the source
+ /// register and the offset which has been added.
+ virtual Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ return None;
+ }
+
+ /// Returns true if MI is an instruction that defines Reg to have a constant
+ /// value and the value is recorded in ImmVal. The ImmVal is a result that
+ /// should be interpreted as modulo size of Reg.
+ virtual bool getConstValDefinedInReg(const MachineInstr &MI,
+ const Register Reg,
+ int64_t &ImmVal) const {
+ return false;
}
/// Store the specified register of the given register class to the specified
@@ -890,7 +1010,7 @@
/// is true, the register operand is the last use and must be marked kill.
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
llvm_unreachable("Target didn't implement "
@@ -902,7 +1022,7 @@
/// machine basic block before the specified machine instruction.
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
llvm_unreachable("Target didn't implement "
@@ -957,9 +1077,23 @@
/// faster sequence.
/// \param Root - Instruction that could be combined with one of its operands
/// \param Patterns - Vector of possible combination patterns
- virtual bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const;
+ virtual bool
+ getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const;
+
+ /// Return true if target supports reassociation of instructions in machine
+ /// combiner pass to reduce register pressure for a given BB.
+ virtual bool
+ shouldReduceRegisterPressure(MachineBasicBlock *MBB,
+ RegisterClassInfo *RegClassInfo) const {
+ return false;
+ }
+
+ /// Fix up the placeholder we may add in genAlternativeCodeSequence().
+ virtual void
+ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {}
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
@@ -1009,12 +1143,17 @@
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+ /// The limit on resource length extension we accept in MachineCombiner Pass.
+ virtual int getExtendResourceLenLimit() const { return 0; }
+
/// This is an architecture-specific helper function of reassociateOps.
/// Set special operand attributes for new instructions after reassociation.
virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {}
+ virtual void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const {}
+
/// Return true when a target supports MachineCombiner.
virtual bool useMachineCombiner() const { return false; }
@@ -1152,11 +1291,24 @@
}
/// Get the base operand and byte offset of an instruction that reads/writes
- /// memory.
- virtual bool getMemOperandWithOffset(const MachineInstr &MI,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+ /// memory. This is a convenience function for callers that are only prepared
+ /// to handle a single base operand.
+ bool getMemOperandWithOffset(const MachineInstr &MI,
+ const MachineOperand *&BaseOp, int64_t &Offset,
+ bool &OffsetIsScalable,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Get zero or more base operands and the byte offset of an instruction that
+ /// reads/writes memory. Note that there may be zero base operands if the
+ /// instruction accesses a constant address.
+ /// It returns false if MI does not read/write memory.
+ /// It returns false if base operands and offset could not be determined.
+ /// It is not guaranteed to always recognize base operands and offsets in all
+ /// cases.
+ virtual bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
return false;
}
@@ -1169,6 +1321,27 @@
return false;
}
+ /// Target dependent implementation to get the values constituting the address
+ /// MachineInstr that is accessing memory. These values are returned as a
+ /// struct ExtAddrMode which contains all relevant information to make up the
+ /// address.
+ virtual Optional<ExtAddrMode>
+ getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ return None;
+ }
+
+ /// Returns true if MI's Def is NullValueReg, and the MI
+ /// does not change the Zero value. i.e. cases such as rax = shr rax, X where
+ /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this
+ /// function can return true even if becomes zero. Specifically cases such as
+ /// NullValueReg = shl NullValueReg, 63.
+ virtual bool preservesZeroValueInReg(const MachineInstr *MI,
+ const Register NullValueReg,
+ const TargetRegisterInfo *TRI) const {
+ return false;
+ }
+
/// If the instruction is an increment of a constant value, return the amount.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const {
return false;
@@ -1180,9 +1353,15 @@
/// or
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
/// to TargetPassConfig::createMachineScheduler() to have an effect.
- virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const {
+ ///
+ /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
+ /// \p NumLoads is the number of loads that will be in the cluster if this
+ /// hook returns true.
+ /// \p NumBytes is the number of bytes that will be loaded from all the
+ /// clustered loads if this hook returns true.
+ virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads, unsigned NumBytes) const {
llvm_unreachable("target did not implement shouldClusterMemOps()");
}
@@ -1197,6 +1376,11 @@
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+ /// Insert noops into the instruction stream at the specified point.
+ virtual void insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const;
+
/// Return the noop instruction to use for a noop.
virtual void getNoop(MCInst &NopInst) const;
@@ -1206,9 +1390,14 @@
/// Returns true if the instruction is already predicated.
virtual bool isPredicated(const MachineInstr &MI) const { return false; }
+ // Returns a MIRPrinter comment for this machine operand.
+ virtual std::string
+ createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op,
+ unsigned OpIdx, const TargetRegisterInfo *TRI) const;
+
/// Returns true if the instruction is a
/// terminator instruction that has not been predicated.
- virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const;
+ bool isUnpredicatedTerminator(const MachineInstr &MI) const;
/// Returns true if MI is an unconditional tail call.
virtual bool isUnconditionalTailCall(const MachineInstr &MI) const {
@@ -1243,8 +1432,13 @@
/// If the specified instruction defines any predicate
/// or condition code register(s) used for predication, returns true as well
/// as the definition predicate(s) by reference.
- virtual bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+ /// SkipDead should be set to false at any point that dead
+ /// predicate instructions should be considered as being defined.
+ /// A dead predicate instruction is one that is guaranteed to be removed
+ /// after a call to PredicateInstruction.
+ virtual bool ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
return false;
}
@@ -1283,7 +1477,7 @@
/// scheduling the machine instructions before register allocation.
virtual ScheduleHazardRecognizer *
CreateTargetMIHazardRecognizer(const InstrItineraryData *,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAGMI *DAG) const;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions after register allocation.
@@ -1306,16 +1500,16 @@
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
- virtual bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const {
+ virtual bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const {
return false;
}
/// See if the comparison instruction can be converted
/// into something more efficient. E.g., on ARM most instructions can set the
/// flags register, obviating the need for a separate CMP.
- virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
+ virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const {
return false;
}
@@ -1330,7 +1524,7 @@
/// the machine instruction generated due to folding.
virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
+ Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
return nullptr;
}
@@ -1342,7 +1536,7 @@
/// block. The caller may assume that it will not be erased by this
/// function otherwise.
virtual bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const {
+ Register Reg, MachineRegisterInfo *MRI) const {
return false;
}
@@ -1515,7 +1709,7 @@
/// This hook works similarly to getPartialRegUpdateClearance, except that it
/// does not take an operand index. Instead sets \p OpNum to the index of the
/// unused register.
- virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
+ virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
// The default implementation returns 0 for no undef register dependency.
return 0;
@@ -1560,11 +1754,10 @@
/// function.
virtual bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const {
- assert((MIa.mayLoad() || MIa.mayStore()) &&
+ const MachineInstr &MIb) const {
+ assert(MIa.mayLoadOrStore() &&
"MIa must load from or modify a memory location");
- assert((MIb.mayLoad() || MIb.mayStore()) &&
+ assert(MIb.mayLoadOrStore() &&
"MIb must load from or modify a memory location");
return false;
}
@@ -1577,6 +1770,21 @@
return 5;
}
+ /// Return the maximal number of alias checks on memory operands. For
+ /// instructions with more than one memory operands, the alias check on a
+ /// single MachineInstr pair has quadratic overhead and results in
+ /// unacceptable performance in the worst case. The limit here is to clamp
+ /// that maximal checks performed. Usually, that's the product of memory
+ /// operand numbers from that pair of MachineInstr to be checked. For
+ /// instance, with two MachineInstrs with 4 and 5 memory operands
+ /// correspondingly, a total of 20 checks are required. With this limit set to
+ /// 16, their alias check is skipped. We choose to limit the product instead
+ /// of the individual instruction as targets may have special MachineInstrs
+ /// with a considerably high number of memory operands, such as `ldm` in ARM.
+ /// Setting this limit per MachineInstr would result in either too high
+ /// overhead or too rigid restriction.
+ virtual unsigned getMemOperandAACheckLimit() const { return 16; }
+
/// Return an array that contains the ids of the target indices (used for the
/// TargetIndex machine operand) and their names.
///
@@ -1638,6 +1846,28 @@
return false;
}
+ /// During PHI eleimination lets target to make necessary checks and
+ /// insert the copy to the PHI destination register in a target specific
+ /// manner.
+ virtual MachineInstr *createPHIDestinationCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src, Register Dst) const {
+ return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
+ .addReg(Src);
+ }
+
+ /// During PHI eleimination lets target to make necessary checks and
+ /// insert the copy to the PHI destination register in a target specific
+ /// manner.
+ virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src,
+ unsigned SrcSubReg,
+ Register Dst) const {
+ return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
+ .addReg(Src, 0, SrcSubReg);
+ }
+
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
/// information for a set of outlining candidates.
virtual outliner::OutlinedFunction getOutliningCandidateInfo(
@@ -1693,11 +1923,22 @@
return false;
}
- /// Produce RHS description of parameter's loading instruction \p MI.
- virtual Optional<ParamLoadedValue>
- describeLoadedValue(const MachineInstr &MI) const;
+ /// Produce the expression describing the \p MI loading a value into
+ /// the physical register \p Reg. This hook should only be used with
+ /// \p MIs belonging to VReg-less functions.
+ virtual Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const;
+
+ /// Return MIR formatter to format/parse MIR operands. Target can override
+ /// this virtual function and return target specific MIR formatter.
+ virtual const MIRFormatter *getMIRFormatter() const {
+ if (!Formatter.get())
+ Formatter = std::make_unique<MIRFormatter>();
+ return Formatter.get();
+ }
private:
+ mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
unsigned CatchRetOpcode;
unsigned ReturnOpcode;
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetLowering.h b/linux-x64/clang/include/llvm/CodeGen/TargetLowering.h
index d5cca60..305107c 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetLowering.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetLowering.h
@@ -28,7 +28,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -37,7 +36,6 @@
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -47,12 +45,11 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -72,8 +69,10 @@
class FastISel;
class FunctionLoweringInfo;
class GlobalValue;
+class GISelKnownBits;
class IntrinsicInst;
struct KnownBits;
+class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineFunction;
@@ -84,9 +83,12 @@
class MCContext;
class MCExpr;
class Module;
-class TargetRegisterClass;
+class ProfileSummaryInfo;
class TargetLibraryInfo;
+class TargetMachine;
+class TargetRegisterClass;
class TargetRegisterInfo;
+class TargetTransformInfo;
class Value;
namespace Sched {
@@ -102,6 +104,85 @@
} // end namespace Sched
+// MemOp models a memory operation, either memset or memcpy/memmove.
+struct MemOp {
+private:
+ // Shared
+ uint64_t Size;
+ bool DstAlignCanChange; // true if destination alignment can satisfy any
+ // constraint.
+ Align DstAlign; // Specified alignment of the memory operation.
+
+ bool AllowOverlap;
+ // memset only
+ bool IsMemset; // If setthis memory operation is a memset.
+ bool ZeroMemset; // If set clears out memory with zeros.
+ // memcpy only
+ bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
+ // constant so it does not need to be loaded.
+ Align SrcAlign; // Inferred alignment of the source or default value if the
+ // memory operation does not need to load the value.
+public:
+ static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
+ Align SrcAlign, bool IsVolatile,
+ bool MemcpyStrSrc = false) {
+ MemOp Op;
+ Op.Size = Size;
+ Op.DstAlignCanChange = DstAlignCanChange;
+ Op.DstAlign = DstAlign;
+ Op.AllowOverlap = !IsVolatile;
+ Op.IsMemset = false;
+ Op.ZeroMemset = false;
+ Op.MemcpyStrSrc = MemcpyStrSrc;
+ Op.SrcAlign = SrcAlign;
+ return Op;
+ }
+
+ static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
+ bool IsZeroMemset, bool IsVolatile) {
+ MemOp Op;
+ Op.Size = Size;
+ Op.DstAlignCanChange = DstAlignCanChange;
+ Op.DstAlign = DstAlign;
+ Op.AllowOverlap = !IsVolatile;
+ Op.IsMemset = true;
+ Op.ZeroMemset = IsZeroMemset;
+ Op.MemcpyStrSrc = false;
+ return Op;
+ }
+
+ uint64_t size() const { return Size; }
+ Align getDstAlign() const {
+ assert(!DstAlignCanChange);
+ return DstAlign;
+ }
+ bool isFixedDstAlign() const { return !DstAlignCanChange; }
+ bool allowOverlap() const { return AllowOverlap; }
+ bool isMemset() const { return IsMemset; }
+ bool isMemcpy() const { return !IsMemset; }
+ bool isMemcpyWithFixedDstAlign() const {
+ return isMemcpy() && !DstAlignCanChange;
+ }
+ bool isZeroMemset() const { return isMemset() && ZeroMemset; }
+ bool isMemcpyStrSrc() const {
+ assert(isMemcpy() && "Must be a memcpy");
+ return MemcpyStrSrc;
+ }
+ Align getSrcAlign() const {
+ assert(isMemcpy() && "Must be a memcpy");
+ return SrcAlign;
+ }
+ bool isSrcAligned(Align AlignCheck) const {
+ return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value());
+ }
+ bool isDstAligned(Align AlignCheck) const {
+ return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value());
+ }
+ bool isAligned(Align AlignCheck) const {
+ return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
+ }
+};
+
/// This base class for TargetLowering contains the SelectionDAG-independent
/// parts that can be used from the rest of CodeGen.
class TargetLoweringBase {
@@ -122,13 +203,20 @@
TypeLegal, // The target natively supports this type.
TypePromoteInteger, // Replace this integer with a larger one.
TypeExpandInteger, // Split this integer into two of half the size.
- TypeSoftenFloat, // Convert this float to a same size integer type,
- // if an operation is not supported in target HW.
+ TypeSoftenFloat, // Convert this float to a same size integer type.
TypeExpandFloat, // Split this float into two of half the size.
TypeScalarizeVector, // Replace this one-element vector with its element.
TypeSplitVector, // Split this vector into two of half the size.
TypeWidenVector, // This vector should be widened into a larger vector.
- TypePromoteFloat // Replace this float with a larger one.
+ TypePromoteFloat, // Replace this float with a larger one.
+ TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
+ TypeScalarizeScalableVector, // This action is explicitly left unimplemented.
+ // While it is theoretically possible to
+ // legalize operations on scalable types with a
+ // loop that handles the vscale * #lanes of the
+ // vector, this is non-trivial at SelectionDAG
+ // level and these types are better to be
+ // widened or promoted.
};
/// LegalizeKind holds the legalization kind that needs to happen to EVT
@@ -172,6 +260,13 @@
// or custom.
};
+ /// Enum that specifies when a float negation is beneficial.
+ enum class NegatibleCost {
+ Cheaper = 0, // Negated expression is cheaper.
+ Neutral = 1, // Negated expression has the same cost.
+ Expensive = 2 // Negated expression is more expensive.
+ };
+
class ArgListEntry {
public:
Value *Val = nullptr;
@@ -183,23 +278,24 @@
bool IsSRet : 1;
bool IsNest : 1;
bool IsByVal : 1;
+ bool IsByRef : 1;
bool IsInAlloca : 1;
+ bool IsPreallocated : 1;
bool IsReturned : 1;
bool IsSwiftSelf : 1;
bool IsSwiftError : 1;
- uint16_t Alignment = 0;
+ bool IsCFGuardTarget : 1;
+ MaybeAlign Alignment = None;
Type *ByValType = nullptr;
+ Type *PreallocatedType = nullptr;
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
- IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
- IsSwiftSelf(false), IsSwiftError(false) {}
+ IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false),
+ IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
+ IsSwiftError(false), IsCFGuardTarget(false) {}
void setAttributes(const CallBase *Call, unsigned ArgIdx);
-
- void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
- return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
- }
};
using ArgListTy = std::vector<ArgListEntry>;
@@ -221,12 +317,16 @@
llvm_unreachable("Invalid content kind");
}
- /// NOTE: The TargetMachine owns TLOF.
explicit TargetLoweringBase(const TargetMachine &TM);
TargetLoweringBase(const TargetLoweringBase &) = delete;
TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
virtual ~TargetLoweringBase() = default;
+ /// Return true if the target support strict float operation
+ bool isStrictFPEnabled() const {
+ return IsStrictFPEnabled;
+ }
+
protected:
/// Initialize all of the actions to default values.
void initActions();
@@ -256,6 +356,12 @@
return getPointerTy(DL, DL.getAllocaAddrSpace());
}
+ /// Return the type for code pointers, which is determined by the program
+ /// address space specified through the data layout.
+ MVT getProgramPointerTy(const DataLayout &DL) const {
+ return getPointerTy(DL, DL.getProgramAddressSpace());
+ }
+
/// Return the type for operands of fence.
/// TODO: Let fence operands be of i32 type and remove this.
virtual MVT getFenceOperandTy(const DataLayout &DL) const {
@@ -269,6 +375,13 @@
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes = true) const;
+ /// Return the preferred type to use for a shift opcode, given the shifted
+ /// amount type is \p ShiftValueTy.
+ LLVM_READONLY
+ virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
+ return ShiftValueTy;
+ }
+
/// Returns the type to be used for the index operand of:
/// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
/// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
@@ -276,6 +389,20 @@
return getPointerTy(DL);
}
+ /// This callback is used to inspect load/store instructions and add
+ /// target-specific MachineMemOperand flags to them. The default
+ /// implementation does nothing.
+ virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
+ return MachineMemOperand::MONone;
+ }
+
+ MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI,
+ const DataLayout &DL) const;
+ MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
+ const DataLayout &DL) const;
+ MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
+ const DataLayout &DL) const;
+
virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
return true;
}
@@ -284,7 +411,7 @@
/// a constant pool load whose address depends on the select condition. The
/// parameter may be used to differentiate a select with FP compare from
/// integer compare.
- virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
return true;
}
@@ -300,7 +427,7 @@
virtual TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const {
// The default action for one element vectors is to scalarize
- if (VT.getVectorNumElements() == 1)
+ if (VT.getVectorElementCount().isScalar())
return TypeScalarizeVector;
// The default action for an odd-width vector is to widen.
if (!VT.isPow2VectorType())
@@ -309,6 +436,12 @@
return TypePromoteInteger;
}
+ // Return true if the half type should be passed around as i16, but promoted
+ // to float around arithmetic. The default behavior is to pass around as
+ // float and convert around loads/stores/bitcasts and other places where
+ // the size matters.
+ virtual bool softPromoteHalfType() const { return false; }
+
// There are two general methods for expanding a BUILD_VECTOR node:
// 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
// them together.
@@ -391,6 +524,10 @@
return PredictableSelectIsExpensive;
}
+ virtual bool fallBackToDAGISel(const Instruction &Inst) const {
+ return false;
+ }
+
/// If a branch or a select condition is skewed in one direction by more than
/// this factor, it is very likely to be predicted correctly.
virtual BranchProbability getPredictableBranchThreshold() const;
@@ -468,6 +605,16 @@
return false;
}
+ /// Return the maximum number of "x & (x - 1)" operations that can be done
+ /// instead of deferring to a custom CTPOP.
+ virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const {
+ return 1;
+ }
+
+ /// Return true if instruction generated for equality comparison is folded
+ /// with instruction generated for signed comparison.
+ virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
+
/// Return true if it is safe to transform an integer-domain bitwise operation
/// into the equivalent floating-point operation. This should be set to true
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
@@ -539,6 +686,12 @@
return hasAndNotCompare(X);
}
+ /// Return true if the target has a bit-test instruction:
+ /// (X & (1 << Y)) ==/!= 0
+ /// This knowledge can be used to prevent breaking the pattern,
+ /// or creating it if it could be recognized.
+ virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
+
/// There are two ways to clear extreme bits (either low or high):
/// Mask: x & (-1 << y) (the instcombine canonical form)
/// Shifts: x >> y << y
@@ -571,6 +724,38 @@
return false;
}
+ /// Given the pattern
+ /// (X & (C l>>/<< Y)) ==/!= 0
+ /// return true if it should be transformed into:
+ /// ((X <</l>> Y) & C) ==/!= 0
+ /// WARNING: if 'X' is a constant, the fold may deadlock!
+ /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
+ /// here because it can end up being not linked in.
+ virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ if (hasBitTest(X, Y)) {
+ // One interesting pattern that we'd want to form is 'bit test':
+ // ((1 << Y) & C) ==/!= 0
+ // But we also need to be careful not to try to reverse that fold.
+
+ // Is this '1 << Y' ?
+ if (OldShiftOpcode == ISD::SHL && CC->isOne())
+ return false; // Keep the 'bit test' pattern.
+
+ // Will it be '1 << Y' after the transform ?
+ if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
+ return true; // Do form the 'bit test' pattern.
+ }
+
+ // If 'X' is a constant, and we transform, then we will immediately
+ // try to undo the fold, thus causing endless combine loop.
+ // So by default, let's assume everyone prefers the fold
+ // iff 'X' is not a constant.
+ return !XC;
+ }
+
/// These two forms are equivalent:
/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y
@@ -798,9 +983,9 @@
PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
int offset = 0; // offset off of ptrVal
- unsigned size = 0; // the size of the memory location
+ uint64_t size = 0; // the size of the memory location
// (taken from memVT if zero)
- unsigned align = 1; // alignment
+ MaybeAlign align = Align(1); // alignment
MachineMemOperand::Flags flags = MachineMemOperand::MONone;
IntrinsicInfo() = default;
@@ -884,6 +1069,11 @@
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
}
@@ -891,52 +1081,31 @@
return Supported ? Action : Expand;
}
+ // If Op is a strict floating-point operation, return the result
+ // of getOperationAction for the equivalent non-strict operation.
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
unsigned EqOpc;
switch (Op) {
default: llvm_unreachable("Unexpected FP pseudo-opcode");
- case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
- case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
- case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
- case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
- case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
- case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
- case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
- case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
- case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
- case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
- case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
}
- auto Action = getOperationAction(EqOpc, VT);
-
- // We don't currently handle Custom or Promote for strict FP pseudo-ops.
- // For now, we just expand for those cases.
- if (Action != Legal)
- Action = Expand;
-
- return Action;
+ return getOperationAction(EqOpc, VT);
}
/// Return true if the specified operation is legal on this target or can be
/// made legal with custom lowering. This is used to help guide high-level
- /// lowering decisions.
- bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
+ /// lowering decisions. LegalOnly is an optional convenience for code paths
+ /// traversed pre and post legalisation.
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Custom);
@@ -944,8 +1113,13 @@
/// Return true if the specified operation is legal on this target or can be
/// made legal using promotion. This is used to help guide high-level lowering
- /// decisions.
- bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
+ /// decisions. LegalOnly is an optional convenience for code paths traversed
+ /// pre and post legalisation.
+ bool isOperationLegalOrPromote(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Promote);
@@ -953,8 +1127,13 @@
/// Return true if the specified operation is legal on this target or can be
/// made legal with custom lowering or using promotion. This is used to help
- /// guide high-level lowering decisions.
- bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
+ /// guide high-level lowering decisions. LegalOnly is an optional convenience
+ /// for code paths traversed pre and post legalisation.
+ bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Custom ||
@@ -988,24 +1167,8 @@
/// Return true if lowering to a jump table is suitable for a set of case
/// clusters which may contain \p NumCases cases, \p Range range of values.
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
- uint64_t Range) const {
- // FIXME: This function check the maximum table size and density, but the
- // minimum size is not checked. It would be nice if the minimum size is
- // also combined within this function. Currently, the minimum size check is
- // performed in findJumpTable() in SelectionDAGBuiler and
- // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
- const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
- const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
- const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
-
- // Check whether the number of cases is small enough and
- // the range is dense enough for a jump table.
- if ((OptForSize || Range <= MaxJumpTableSize) &&
- (NumCases * 100 >= Range * MinDensity)) {
- return true;
- }
- return false;
- }
+ uint64_t Range, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const;
/// Return true if lowering to a bit test is suitable for a set of case
/// clusters which contains \p NumDests unique destinations, \p Low and
@@ -1102,12 +1265,8 @@
/// Return how the indexed load should be treated: either it is legal, needs
/// to be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
- LegalizeAction
- getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
- assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
- "Table isn't big enough!");
- unsigned Ty = (unsigned)VT.SimpleTy;
- return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
+ LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_Load);
}
/// Return true if the specified indexed load is legal on this target.
@@ -1120,12 +1279,8 @@
/// Return how the indexed store should be treated: either it is legal, needs
/// to be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
- LegalizeAction
- getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
- assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
- "Table isn't big enough!");
- unsigned Ty = (unsigned)VT.SimpleTy;
- return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
+ LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_Store);
}
/// Return true if the specified indexed load is legal on this target.
@@ -1135,6 +1290,38 @@
getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
}
+ /// Return how the indexed load should be treated: either it is legal, needs
+ /// to be promoted to a larger size, needs to be expanded to some other code
+ /// sequence, or the target has a custom expander for it.
+ LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad);
+ }
+
+ /// Return true if the specified indexed load is legal on this target.
+ bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
+ return VT.isSimple() &&
+ (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
+ getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
+ }
+
+ /// Return how the indexed store should be treated: either it is legal, needs
+ /// to be promoted to a larger size, needs to be expanded to some other code
+ /// sequence, or the target has a custom expander for it.
+ LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore);
+ }
+
+ /// Return true if the specified indexed load is legal on this target.
+ bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
+ return VT.isSimple() &&
+ (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
+ getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
+ }
+
+ // Returns true if VT is a legal index type for masked gathers/scatters
+ // on this target
+ virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; }
+
/// Return how the condition code should be treated: either it is legal, needs
/// to be expanded to some other code sequence, or the target has a custom
/// expander for it.
@@ -1206,7 +1393,7 @@
EltTy = PointerTy.getTypeForEVT(Ty->getContext());
}
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
- VTy->getNumElements());
+ VTy->getElementCount());
}
return EVT::getEVT(Ty, AllowUnknown);
@@ -1224,7 +1411,7 @@
Elm = PointerTy.getTypeForEVT(Ty->getContext());
}
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
- VTy->getNumElements());
+ VTy->getElementCount());
}
return getValueType(DL, Ty, AllowUnknown);
@@ -1316,9 +1503,9 @@
/// Certain targets have context senstive alignment requirements, where one
/// type has the alignment requirement of another type.
- virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
- DataLayout DL) const {
- return DL.getABITypeAlignment(ArgTy);
+ virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const {
+ return DL.getABITypeAlign(ArgTy);
}
/// If true, then instruction selection should seek to shrink the FP constant
@@ -1426,13 +1613,40 @@
return false;
}
+ /// LLT handling variant.
+ virtual bool allowsMisalignedMemoryAccesses(
+ LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool * /*Fast*/ = nullptr) const {
+ return false;
+ }
+
+ /// This function returns true if the memory access is aligned or if the
+ /// target allows this specific unaligned memory access. If the access is
+ /// allowed, the optional final parameter returns if the access is also fast
+ /// (as defined by the target).
+ bool allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace = 0, Align Alignment = Align(1),
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const;
+
+ /// Return true if the memory access of this type is aligned or if the target
+ /// allows this specific unaligned access for the given MachineMemOperand.
+ /// If the access is allowed, the optional final parameter returns if the
+ /// access is also fast (as defined by the target).
+ bool allowsMemoryAccessForAlignment(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ bool *Fast = nullptr) const;
+
/// Return true if the target supports a memory access of this type for the
/// given address space and alignment. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the
/// target).
- bool
+ virtual bool
allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
- unsigned AddrSpace = 0, unsigned Alignment = 1,
+ unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const;
@@ -1446,23 +1660,21 @@
/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.
- ///
- /// If DstAlign is zero that means it's safe to destination alignment can
- /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
- /// a need to check it against alignment requirement, probably because the
- /// source does not need to be loaded. If 'IsMemset' is true, that means it's
- /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
- /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
- /// does not need to be loaded. It returns EVT::Other if the type should be
- /// determined using generic target-independent logic.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
- unsigned /*SrcAlign*/, bool /*IsMemset*/,
- bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
+ getOptimalMemOpType(const MemOp &Op,
const AttributeList & /*FuncAttributes*/) const {
return MVT::Other;
}
+ /// LLT returning variant.
+ virtual LLT
+ getOptimalMemOpLLT(const MemOp &Op,
+ const AttributeList & /*FuncAttributes*/) const {
+ return LLT();
+ }
+
/// Returns true if it's safe to use load / store of the specified type to
/// expand memcpy / memset inline.
///
@@ -1472,16 +1684,6 @@
/// have to be legal as the hook is used before type legalization.
virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
- /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
- bool usesUnderscoreSetJmp() const {
- return UseUnderscoreSetJmp;
- }
-
- /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
- bool usesUnderscoreLongJmp() const {
- return UseUnderscoreLongJmp;
- }
-
/// Return lower limit for number of blocks in a jump table.
virtual unsigned getMinimumJumpTableEntries() const;
@@ -1492,65 +1694,45 @@
/// Zero if no limit.
unsigned getMaximumJumpTableSize() const;
- virtual bool isJumpTableRelative() const {
- return TM.isPositionIndependent();
- }
+ virtual bool isJumpTableRelative() const;
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
- unsigned getStackPointerRegisterToSaveRestore() const {
+ Register getStackPointerRegisterToSaveRestore() const {
return StackPointerRegisterToSaveRestore;
}
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- virtual unsigned
+ virtual Register
getExceptionPointerRegister(const Constant *PersonalityFn) const {
- // 0 is guaranteed to be the NoRegister value on all targets
- return 0;
+ return Register();
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- virtual unsigned
+ virtual Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const {
- // 0 is guaranteed to be the NoRegister value on all targets
- return 0;
+ return Register();
}
virtual bool needsFixedCatchObjects() const {
report_fatal_error("Funclet EH is not implemented for this target");
}
- /// Returns the target's jmp_buf size in bytes (if never set, the default is
- /// 200)
- unsigned getJumpBufSize() const {
- return JumpBufSize;
- }
-
- /// Returns the target's jmp_buf alignment in bytes (if never set, the default
- /// is 0)
- unsigned getJumpBufAlignment() const {
- return JumpBufAlignment;
- }
-
/// Return the minimum stack alignment of an argument.
- unsigned getMinStackArgumentAlignment() const {
+ Align getMinStackArgumentAlignment() const {
return MinStackArgumentAlignment;
}
/// Return the minimum function alignment.
- unsigned getMinFunctionAlignment() const {
- return MinFunctionAlignment;
- }
+ Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
/// Return the preferred function alignment.
- unsigned getPrefFunctionAlignment() const {
- return PrefFunctionAlignment;
- }
+ Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
/// Return the preferred loop alignment.
- virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
+ virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
return PrefLoopAlignment;
}
@@ -1597,21 +1779,18 @@
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
+ virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
+
+ virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
+
virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
return "";
}
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
- return false;
- }
-
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
/// are happy to sink it into basic blocks. A cast may be free, but not
/// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
- virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
- return isNoopAddrSpaceCast(SrcAS, DestAS);
- }
+ virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
/// Return true if the pointer arguments to CI should be aligned by aligning
/// the object whose address is being passed. If so then MinSize is set to the
@@ -1772,6 +1951,11 @@
return IsSigned;
}
+ /// Returns true if arguments should be extended in lib calls.
+ virtual bool shouldExtendTypeInLibCall(EVT Type) const {
+ return true;
+ }
+
/// Returns how the given (atomic) load should be expanded by the
/// IR-level AtomicExpand pass.
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
@@ -1814,6 +1998,18 @@
return ISD::ZERO_EXTEND;
}
+ /// Returns how the platform's atomic compare and swap expects its comparison
+ /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is
+ /// separate from getExtendForAtomicOps, which is concerned with the
+ /// sign-extension of the instruction's output, whereas here we are concerned
+ /// with the sign-extension of the input. For targets with compare-and-swap
+ /// instructions (or sub-word comparisons in their LL/SC loop expansions),
+ /// the input can be ANY_EXTEND, but the output will still have a specific
+ /// extension.
+ virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const {
+ return ISD::ANY_EXTEND;
+ }
+
/// @}
/// Returns true if we should normalize
@@ -1848,7 +2044,8 @@
/// This may be true if the target does not directly support the
/// multiplication operation for the specified type or the sequence of simpler
/// ops is faster than the multiply.
- virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+ virtual bool decomposeMulByConstant(LLVMContext &Context,
+ EVT VT, SDValue C) const {
return false;
}
@@ -1892,18 +2089,6 @@
SchedPreferenceInfo = Pref;
}
- /// Indicate whether this target prefers to use _setjmp to implement
- /// llvm.setjmp or the version without _. Defaults to false.
- void setUseUnderscoreSetJmp(bool Val) {
- UseUnderscoreSetJmp = Val;
- }
-
- /// Indicate whether this target prefers to use _longjmp to implement
- /// llvm.longjmp or the version without _. Defaults to false.
- void setUseUnderscoreLongJmp(bool Val) {
- UseUnderscoreLongJmp = Val;
- }
-
/// Indicate the minimum number of blocks to generate jump tables.
void setMinimumJumpTableEntries(unsigned Val);
@@ -1913,7 +2098,7 @@
/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
- void setStackPointerRegisterToSaveRestore(unsigned R) {
+ void setStackPointerRegisterToSaveRestore(Register R) {
StackPointerRegisterToSaveRestore = R;
}
@@ -1995,13 +2180,8 @@
///
/// NOTE: All indexed mode loads are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedLoadAction(unsigned IdxMode, MVT VT,
- LegalizeAction Action) {
- assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
- (unsigned)Action < 0xf && "Table isn't big enough!");
- // Load action are kept in the upper half.
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
+ void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
}
/// Indicate that the specified indexed store does or does not work with the
@@ -2009,13 +2189,28 @@
///
/// NOTE: All indexed mode stores are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedStoreAction(unsigned IdxMode, MVT VT,
- LegalizeAction Action) {
- assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
- (unsigned)Action < 0xf && "Table isn't big enough!");
- // Store action are kept in the lower half.
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
+ void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
+ }
+
+ /// Indicate that the specified indexed masked load does or does not work with
+ /// the specified type and indicate what to do about it.
+ ///
+ /// NOTE: All indexed mode masked loads are initialized to Expand in
+ /// TargetLowering.cpp
+ void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
+ LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action);
+ }
+
+ /// Indicate that the specified indexed masked store does or does not work
+ /// with the specified type and indicate what to do about it.
+ ///
+ /// NOTE: All indexed mode masked stores are initialized to Expand in
+ /// TargetLowering.cpp
+ void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
+ LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action);
}
/// Indicate that the specified condition code is or isn't supported on the
@@ -2056,40 +2251,25 @@
TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
}
- /// Set the target's required jmp_buf buffer size (in bytes); default is 200
- void setJumpBufSize(unsigned Size) {
- JumpBufSize = Size;
- }
-
- /// Set the target's required jmp_buf buffer alignment (in bytes); default is
- /// 0
- void setJumpBufAlignment(unsigned Align) {
- JumpBufAlignment = Align;
- }
-
- /// Set the target's minimum function alignment (in log2(bytes))
- void setMinFunctionAlignment(unsigned Align) {
- MinFunctionAlignment = Align;
+ /// Set the target's minimum function alignment.
+ void setMinFunctionAlignment(Align Alignment) {
+ MinFunctionAlignment = Alignment;
}
/// Set the target's preferred function alignment. This should be set if
- /// there is a performance benefit to higher-than-minimum alignment (in
- /// log2(bytes))
- void setPrefFunctionAlignment(unsigned Align) {
- PrefFunctionAlignment = Align;
+ /// there is a performance benefit to higher-than-minimum alignment
+ void setPrefFunctionAlignment(Align Alignment) {
+ PrefFunctionAlignment = Alignment;
}
- /// Set the target's preferred loop alignment. Default alignment is zero, it
- /// means the target does not care about loop alignment. The alignment is
- /// specified in log2(bytes). The target may also override
- /// getPrefLoopAlignment to provide per-loop values.
- void setPrefLoopAlignment(unsigned Align) {
- PrefLoopAlignment = Align;
- }
+ /// Set the target's preferred loop alignment. Default alignment is one, it
+ /// means the target does not care about loop alignment. The target may also
+ /// override getPrefLoopAlignment to provide per-loop values.
+ void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
- /// Set the minimum stack alignment of an argument (in log2(bytes)).
- void setMinStackArgumentAlignment(unsigned Align) {
- MinStackArgumentAlignment = Align;
+ /// Set the minimum stack alignment of an argument.
+ void setMinStackArgumentAlignment(Align Alignment) {
+ MinStackArgumentAlignment = Alignment;
}
/// Set the maximum atomic operation size supported by the
@@ -2193,13 +2373,31 @@
}
/// Return true if it's significantly cheaper to shift a vector by a uniform
- /// scalar than by an amount which will vary across each lane. On x86, for
- /// example, there is a "psllw" instruction for the former case, but no simple
- /// instruction for a general "a << b" operation on vectors.
+ /// scalar than by an amount which will vary across each lane. On x86 before
+ /// AVX2 for example, there is a "psllw" instruction for the former case, but
+ /// no simple instruction for a general "a << b" operation on vectors.
+ /// This should also apply to lowering for vector funnel shifts (rotates).
virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
return false;
}
+ /// Given a shuffle vector SVI representing a vector splat, return a new
+ /// scalar type of size equal to SVI's scalar type if the new type is more
+ /// profitable. Returns nullptr otherwise. For example under MVE float splats
+ /// are converted to integer to prevent the need to move from SPR to GPR
+ /// registers.
+ virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const {
+ return nullptr;
+ }
+
+ /// Given a set in interconnected phis of type 'From' that are loaded/stored
+ /// or bitcast to type 'To', return true if the set should be converted to
+ /// 'To'.
+ virtual bool shouldConvertPhiType(Type *From, Type *To) const {
+ return (From->isIntegerTy() || From->isFloatingPointTy()) &&
+ (To->isIntegerTy() || To->isFloatingPointTy());
+ }
+
/// Returns true if the opcode is a commutative binary operation.
virtual bool isCommutativeBinOp(unsigned Opcode) const {
// FIXME: This should get its info from the td file.
@@ -2396,7 +2594,7 @@
/// this information should not be provided because it will generate more
/// loads.
virtual bool hasPairedLoad(EVT /*LoadedType*/,
- unsigned & /*RequiredAlignment*/) const {
+ Align & /*RequiredAlignment*/) const {
return false;
}
@@ -2451,7 +2649,8 @@
/// Return true if an fpext operation input to an \p Opcode operation is free
/// (for instance, because half-precision floating-point numbers are
/// implicitly extended to float-precision) for an FMA instruction.
- virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
+ virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
+ EVT DestVT, EVT SrcVT) const {
assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"invalid fpext types");
return isFPExtFree(DestVT, SrcVT);
@@ -2483,10 +2682,26 @@
/// not legal, but should return true if those types will eventually legalize
/// to types that support FMAs. After legalization, it will only be called on
/// types that support FMAs (via Legal or Custom actions)
- virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
+ virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT) const {
return false;
}
+ /// IR version
+ virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
+ return false;
+ }
+
+ /// Returns true if be combined with to form an ISD::FMAD. \p N may be an
+ /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
+ /// fadd/fsub.
+ virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
+ assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||
+ N->getOpcode() == ISD::FMUL) &&
+ "unexpected node in FMAD forming combine");
+ return isOperationLegal(ISD::FMAD, N->getValueType(0));
+ }
+
/// Return true if it's profitable to narrow operations of type VT1 to
/// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
/// i32 to i16.
@@ -2531,17 +2746,21 @@
/// node operation. Targets may want to override this independently of whether
/// the operation is legal/custom for the given type because it may obscure
/// matching of other patterns.
- virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+ virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const {
// TODO: The default logic is inherited from code in CodeGenPrepare.
// The opcode should not make a difference by default?
if (Opcode != ISD::UADDO)
return false;
// Allow the transform as long as we have an integer type that is not
- // obviously illegal and unsupported.
+ // obviously illegal and unsupported and if the math result is used
+ // besides the overflow check. On some targets (e.g. SPARC), it is
+ // not profitable to form on overflow op if the math result has no
+ // concrete users.
if (VT.isVector())
return false;
- return VT.isSimple() || !isOperationExpand(Opcode, VT);
+ return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
}
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
@@ -2555,6 +2774,12 @@
// same blocks of its users.
virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+ /// Return true if creating a shift of the type by the given
+ /// amount is not profitable.
+ virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
@@ -2597,6 +2822,13 @@
/// The default implementation just freezes the set of reserved registers.
virtual void finalizeLowering(MachineFunction &MF) const;
+ //===----------------------------------------------------------------------===//
+ // GlobalISel Hooks
+ //===----------------------------------------------------------------------===//
+ /// Check whether or not \p MI needs to be moved close to its uses.
+ virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const;
+
+
private:
const TargetMachine &TM;
@@ -2624,16 +2856,6 @@
/// predication.
bool JumpIsExpensive;
- /// This target prefers to use _setjmp to implement llvm.setjmp.
- ///
- /// Defaults to false.
- bool UseUnderscoreSetJmp;
-
- /// This target prefers to use _longjmp to implement llvm.longjmp.
- ///
- /// Defaults to false.
- bool UseUnderscoreLongJmp;
-
/// Information about the contents of the high-bits in boolean values held in
/// a type wider than i1. See getBooleanContents.
BooleanContent BooleanContents;
@@ -2650,25 +2872,19 @@
/// register usage.
Sched::Preference SchedPreferenceInfo;
- /// The size, in bytes, of the target's jmp_buf buffers
- unsigned JumpBufSize;
-
- /// The alignment, in bytes, of the target's jmp_buf buffers
- unsigned JumpBufAlignment;
-
/// The minimum alignment that any argument on the stack needs to have.
- unsigned MinStackArgumentAlignment;
+ Align MinStackArgumentAlignment;
/// The minimum function alignment (used when optimizing for size, and to
/// prevent explicitly provided alignment from leading to incorrect code).
- unsigned MinFunctionAlignment;
+ Align MinFunctionAlignment;
/// The preferred function alignment (used when alignment unspecified and
/// optimizing for speed).
- unsigned PrefFunctionAlignment;
+ Align PrefFunctionAlignment;
- /// The preferred loop alignment.
- unsigned PrefLoopAlignment;
+ /// The preferred loop alignment (in log2 bot in bytes).
+ Align PrefLoopAlignment;
/// Size in bits of the maximum atomics size the backend supports.
/// Accesses larger than this will be expanded by AtomicExpandPass.
@@ -2683,12 +2899,12 @@
/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
- unsigned StackPointerRegisterToSaveRestore;
+ Register StackPointerRegisterToSaveRestore;
/// This indicates the default register class to use for each ValueType the
/// target supports natively.
const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
- unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
+ uint16_t NumRegistersForVT[MVT::LAST_VALUETYPE];
MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
/// This indicates the "representative" register class to use for each
@@ -2728,13 +2944,13 @@
/// truncating store of a specific value type and truncating type is legal.
LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
- /// For each indexed mode and each value type, keep a pair of LegalizeAction
+ /// For each indexed mode and each value type, keep a quad of LegalizeAction
/// that indicates how instruction selection should deal with the load /
- /// store.
+ /// store / maskedload / maskedstore.
///
/// The first dimension is the value_type for the reference. The second
/// dimension represents the various modes for load store.
- uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
+ uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
/// For each condition code (ISD::CondCode) keep a LegalizeAction that
/// indicates how instruction selection should deal with the condition code.
@@ -2744,7 +2960,6 @@
/// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
-protected:
ValueTypeActionImpl ValueTypeActions;
private:
@@ -2778,6 +2993,32 @@
/// Set default libcall names and calling conventions.
void InitLibcalls(const Triple &TT);
+ /// The bits of IndexedModeActions used to store the legalisation actions
+ /// We store the data as | ML | MS | L | S | each taking 4 bits.
+ enum IndexedModeActionsBits {
+ IMAB_Store = 0,
+ IMAB_Load = 4,
+ IMAB_MaskedStore = 8,
+ IMAB_MaskedLoad = 12
+ };
+
+ void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
+ LegalizeAction Action) {
+ assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
+ (unsigned)Action < 0xf && "Table isn't big enough!");
+ unsigned Ty = (unsigned)VT.SimpleTy;
+ IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
+ IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
+ }
+
+ LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
+ unsigned Shift) const {
+ assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
+ "Table isn't big enough!");
+ unsigned Ty = (unsigned)VT.SimpleTy;
+ return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
+ }
+
protected:
/// Return true if the extension represented by \p I is free.
/// \pre \p I is a sign, zero, or fp extension and
@@ -2790,7 +3031,7 @@
/// expected to be merged.
unsigned GatherAllAliasesMaxDepth;
- /// Specify maximum number of store instructions per memset call.
+ /// \brief Specify maximum number of store instructions per memset call.
///
/// When lowering \@llvm.memset this field specifies the maximum number of
/// store operations that may be substituted for the call to memset. Targets
@@ -2801,12 +3042,10 @@
/// with 16-bit alignment would result in four 2-byte stores and one 1-byte
/// store. This only applies to setting a constant array of a constant size.
unsigned MaxStoresPerMemset;
-
- /// Maximum number of stores operations that may be substituted for the call
- /// to memset, used for functions with OptSize attribute.
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxStoresPerMemsetOptSize;
- /// Specify maximum bytes of store instructions per memcpy call.
+ /// \brief Specify maximum number of store instructions per memcpy call.
///
/// When lowering \@llvm.memcpy this field specifies the maximum number of
/// store operations that may be substituted for a call to memcpy. Targets
@@ -2818,8 +3057,8 @@
/// and one 1-byte store. This only applies to copying a constant array of
/// constant size.
unsigned MaxStoresPerMemcpy;
-
-
+ /// Likewise for functions with the OptSize attribute.
+ unsigned MaxStoresPerMemcpyOptSize;
/// \brief Specify max number of store instructions to glue in inlined memcpy.
///
/// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
@@ -2827,13 +3066,22 @@
// vectorization later on.
unsigned MaxGluedStoresPerMemcpy = 0;
- /// Maximum number of store operations that may be substituted for a call to
- /// memcpy, used for functions with OptSize attribute.
- unsigned MaxStoresPerMemcpyOptSize;
+ /// \brief Specify maximum number of load instructions per memcmp call.
+ ///
+ /// When lowering \@llvm.memcmp this field specifies the maximum number of
+ /// pairs of load operations that may be substituted for a call to memcmp.
+ /// Targets must set this value based on the cost threshold for that target.
+ /// Targets should assume that the memcmp will be done using as many of the
+ /// largest load operations first, followed by smaller ones, if necessary, per
+ /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
+ /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
+ /// and one 1-byte load. This only applies to copying a constant array of
+ /// constant size.
unsigned MaxLoadsPerMemcmp;
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxLoadsPerMemcmpOptSize;
- /// Specify maximum bytes of store instructions per memmove call.
+ /// \brief Specify maximum number of store instructions per memmove call.
///
/// When lowering \@llvm.memmove this field specifies the maximum number of
/// store instructions that may be substituted for a call to memmove. Targets
@@ -2844,9 +3092,7 @@
/// with 8-bit alignment would result in nine 1-byte stores. This only
/// applies to copying a constant array of constant size.
unsigned MaxStoresPerMemmove;
-
- /// Maximum number of store instructions that may be substituted for a call to
- /// memmove, used for functions with OptSize attribute.
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxStoresPerMemmoveOptSize;
/// Tells the code generator that select is more expensive than a branch if
@@ -2875,6 +3121,8 @@
/// details.
MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+
+ bool IsStrictFPEnabled;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
@@ -2885,11 +3133,11 @@
class TargetLowering : public TargetLoweringBase {
public:
struct DAGCombinerInfo;
+ struct MakeLibCallOptions;
TargetLowering(const TargetLowering &) = delete;
TargetLowering &operator=(const TargetLowering &) = delete;
- /// NOTE: The TargetMachine owns TLOF.
explicit TargetLowering(const TargetMachine &TM);
bool isPositionIndependent() const;
@@ -2925,6 +3173,14 @@
return false;
}
+ /// Returns true if the specified base+offset is a legal indexed addressing
+ /// mode for this target. \p MI is the load or store instruction that is being
+ /// considered for transformation.
+ virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const {
+ return false;
+ }
+
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
virtual unsigned getJumpTableEncoding() const;
@@ -2955,14 +3211,22 @@
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
SDValue &NewRHS, ISD::CondCode &CCCode,
- const SDLoc &DL) const;
+ const SDLoc &DL, const SDValue OldLHS,
+ const SDValue OldRHS) const;
+
+ void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
+ SDValue &NewRHS, ISD::CondCode &CCCode,
+ const SDLoc &DL, const SDValue OldLHS,
+ const SDValue OldRHS, SDValue &Chain,
+ bool IsSignaling = false) const;
/// Returns a pair of (return value, chain).
/// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
- std::pair<SDValue, SDValue> makeLibCall(
- SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops,
- bool isSigned, const SDLoc &dl, bool doesNotReturn = false,
- bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const;
+ std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
+ EVT RetVT, ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl,
+ SDValue Chain = SDValue()) const;
/// Check whether parameters to a call that are passed in callee saved
/// registers are the same as from the calling function. This needs to be
@@ -3004,27 +3268,28 @@
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- bool AllowOverlap,
- unsigned DstAS, unsigned SrcAS,
+ bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return
/// true.
- bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const;
+
+ /// Helper wrapper around ShrinkDemandedConstant, demanding all elements.
+ bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
TargetLoweringOpt &TLO) const;
// Target hook to do target-specific const optimization, which is called by
// ShrinkDemandedConstant. This function should return true if the target
// doesn't want ShrinkDemandedConstant to further optimize the constant.
- virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ virtual bool targetShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
return false;
}
@@ -3062,9 +3327,30 @@
/// Helper wrapper around SimplifyDemandedBits.
/// Adds Op back to the worklist upon success.
- bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const;
+ /// More limited version of SimplifyDemandedBits that can be used to "look
+ /// through" ops that don't contribute to the DemandedBits/DemandedElts -
+ /// bitwise ops etc.
+ SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ SelectionDAG &DAG,
+ unsigned Depth) const;
+
+ /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
+ /// elements.
+ SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
+ SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
+ /// bits from only some vector elements.
+ SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
/// Look at Vector Op. At this point, we know that only the DemandedElts
/// elements of the result of Op are ever used downstream. If we can use
/// this information to simplify Op, create a new simplified DAG node and
@@ -3100,14 +3386,31 @@
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ /// Determine which of the bits specified in Mask are known to be either zero
+ /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
+ /// argument allows us to only collect the known bits that are shared by the
+ /// requested vector elements. This is for GISel.
+ virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
+ Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const;
+
+ /// Determine the known alignment for the pointer value \p R. This is can
+ /// typically be inferred from the number of low known 0 bits. However, for a
+ /// pointer with a non-integral address space, the alignment value may be
+ /// independent from the known low bits.
+ virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis,
+ Register R,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const;
+
/// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
/// Default implementation computes low bits based on alignment
/// information. This should preserve known bits passed into it.
- virtual void computeKnownBitsForFrameIndex(const SDValue FIOp,
+ virtual void computeKnownBitsForFrameIndex(int FIOp,
KnownBits &Known,
- const APInt &DemandedElts,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ const MachineFunction &MF) const;
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner. The DemandedElts
@@ -3118,6 +3421,16 @@
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ /// This method can be implemented by targets that want to expose additional
+ /// information about sign bits to GlobalISel combiners. The DemandedElts
+ /// argument allows us to only collect the minimum sign bits that are shared
+ /// by the requested vector elements.
+ virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
+ Register R,
+ const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const;
+
/// Attempt to simplify any target nodes based on the demanded vector
/// elements, returning true on success. Otherwise, analyze the expression and
/// return a mask of KnownUndef and KnownZero elements for the expression
@@ -3139,6 +3452,21 @@
TargetLoweringOpt &TLO,
unsigned Depth = 0) const;
+ /// More limited version of SimplifyDemandedBits that can be used to "look
+ /// through" ops that don't contribute to the DemandedBits/DemandedElts -
+ /// bitwise ops etc.
+ virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const;
+
+ /// Tries to build a legal vector shuffle using the provided parameters
+ /// or equivalent variations. The Mask argument maybe be modified as the
+ /// function tries different variations.
+ /// Returns an empty SDValue if the operation fails.
+ SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const;
+
/// This method returns the constant pool value that will be loaded by LD.
/// NOTE: You must check for implicit extensions of the constant by LD.
virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
@@ -3163,9 +3491,7 @@
bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
- bool isAfterLegalizeDAG() const {
- return Level == AfterLegalizeDAG;
- }
+ bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
CombineLevel getDAGCombineLevel() { return Level; }
bool isCalledByLegalizer() const { return CalledByLegalizer; }
@@ -3174,6 +3500,8 @@
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
};
@@ -3230,20 +3558,6 @@
return true;
}
- // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
- // to a shuffle and a truncate.
- // Example of such a combine:
- // v4i32 build_vector((extract_elt V, 1),
- // (extract_elt V, 3),
- // (extract_elt V, 5),
- // (extract_elt V, 7))
- // -->
- // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64)
- virtual bool isDesirableToCombineBuildVectorToShuffleTruncate(
- ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
- return false;
- }
-
/// Return true if the target has native support for the specified value type
/// and it is 'desirable' to use the type for the given node type. e.g. On x86
/// i16 is legal, but undesirable since i16 instruction encodings are longer
@@ -3297,11 +3611,61 @@
llvm_unreachable("Not Implemented");
}
+ /// Return the newly negated expression if the cost is not expensive and
+ /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
+ /// do the negation.
+ virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth = 0) const;
+
+ /// This is the helper function to return the newly negated expression only
+ /// when the cost is cheaper.
+ SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ unsigned Depth = 0) const {
+ NegatibleCost Cost = NegatibleCost::Expensive;
+ SDValue Neg =
+ getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
+ if (Neg && Cost == NegatibleCost::Cheaper)
+ return Neg;
+ // Remove the new created node to avoid the side effect to the DAG.
+ if (Neg && Neg.getNode()->use_empty())
+ DAG.RemoveDeadNode(Neg.getNode());
+ return SDValue();
+ }
+
+ /// This is the helper function to return the newly negated expression if
+ /// the cost is not expensive.
+ SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
+ bool OptForSize, unsigned Depth = 0) const {
+ NegatibleCost Cost = NegatibleCost::Expensive;
+ return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
+ }
+
//===--------------------------------------------------------------------===//
// Lowering methods - These methods must be implemented by targets so that
// the SelectionDAGBuilder code knows how to lower these.
//
+ /// Target-specific splitting of values into parts that fit a register
+ /// storing a legal type
+ virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const {
+ return false;
+ }
+
+ /// Target-specific combining of register parts into its original value
+ virtual SDValue
+ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT,
+ Optional<CallingConv::ID> CC) const {
+ return SDValue();
+ }
+
/// This hook must be implemented to lower the incoming (formal) arguments,
/// described by the Ins array, into the specified DAG. The implementation
/// should fill in the InVals array with legal-type argument values, and
@@ -3328,6 +3692,8 @@
bool IsReturnValueUsed : 1;
bool IsConvergent : 1;
bool IsPatchPoint : 1;
+ bool IsPreallocated : 1;
+ bool NoMerge : 1;
// IsTailCall should be modified by implementations of
// TargetLowering::LowerCall that perform tail call conversions.
@@ -3342,7 +3708,7 @@
ArgListTy Args;
SelectionDAG &DAG;
SDLoc DL;
- ImmutableCallSite CS;
+ const CallBase *CB = nullptr;
SmallVector<ISD::OutputArg, 32> Outs;
SmallVector<SDValue, 32> OutVals;
SmallVector<ISD::InputArg, 32> Ins;
@@ -3351,7 +3717,8 @@
CallLoweringInfo(SelectionDAG &DAG)
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
- IsPatchPoint(false), DAG(DAG) {}
+ IsPatchPoint(false), IsPreallocated(false), NoMerge(false),
+ DAG(DAG) {}
CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
DL = dl;
@@ -3389,26 +3756,26 @@
CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
SDValue Target, ArgListTy &&ArgsList,
- ImmutableCallSite Call) {
+ const CallBase &Call) {
RetTy = ResultType;
IsInReg = Call.hasRetAttr(Attribute::InReg);
DoesNotReturn =
Call.doesNotReturn() ||
- (!Call.isInvoke() &&
- isa<UnreachableInst>(Call.getInstruction()->getNextNode()));
+ (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode()));
IsVarArg = FTy->isVarArg();
- IsReturnValueUsed = !Call.getInstruction()->use_empty();
+ IsReturnValueUsed = !Call.use_empty();
RetSExt = Call.hasRetAttr(Attribute::SExt);
RetZExt = Call.hasRetAttr(Attribute::ZExt);
-
+ NoMerge = Call.hasFnAttr(Attribute::NoMerge);
+
Callee = Target;
CallConv = Call.getCallingConv();
NumFixedArgs = FTy->getNumParams();
Args = std::move(ArgsList);
- CS = Call;
+ CB = &Call;
return *this;
}
@@ -3458,6 +3825,11 @@
return *this;
}
+ CallLoweringInfo &setIsPreallocated(bool Value = true) {
+ IsPreallocated = Value;
+ return *this;
+ }
+
CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
IsPostTypeLegalization = Value;
return *this;
@@ -3468,6 +3840,51 @@
}
};
+ /// This structure is used to pass arguments to makeLibCall function.
+ struct MakeLibCallOptions {
+ // By passing type list before soften to makeLibCall, the target hook
+ // shouldExtendTypeInLibCall can get the original type before soften.
+ ArrayRef<EVT> OpsVTBeforeSoften;
+ EVT RetVTBeforeSoften;
+ bool IsSExt : 1;
+ bool DoesNotReturn : 1;
+ bool IsReturnValueUsed : 1;
+ bool IsPostTypeLegalization : 1;
+ bool IsSoften : 1;
+
+ MakeLibCallOptions()
+ : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
+ IsPostTypeLegalization(false), IsSoften(false) {}
+
+ MakeLibCallOptions &setSExt(bool Value = true) {
+ IsSExt = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setNoReturn(bool Value = true) {
+ DoesNotReturn = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setDiscardResult(bool Value = true) {
+ IsReturnValueUsed = !Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
+ IsPostTypeLegalization = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
+ bool Value = true) {
+ OpsVTBeforeSoften = OpsVT;
+ RetVTBeforeSoften = RetVT;
+ IsSoften = Value;
+ return *this;
+ }
+ };
+
/// This function lowers an abstract call to a function into an actual call.
/// This returns a pair of operands. The first element is the return value
/// for the function (if RetTy is not VoidTy). The second element is the
@@ -3486,7 +3903,7 @@
}
/// Target-specific cleanup for formal ByVal parameters.
- virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
+ virtual void HandleByVal(CCState *, unsigned &, Align) const {}
/// This hook should be implemented to check whether the return values
/// described by the Outs array can fit into the return registers. If false
@@ -3537,8 +3954,8 @@
/// Return the register ID of the name passed in. Used by named register
/// global variables extension. There is no target-independent behaviour
/// so the default action is to bail.
- virtual unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+ virtual Register getRegisterByName(const char* RegName, LLT Ty,
+ const MachineFunction &MF) const {
report_fatal_error("Named registers not implemented for this target");
}
@@ -3590,13 +4007,25 @@
return Chain;
}
- /// This callback is used to inspect load/store instructions and add
- /// target-specific MachineMemOperand flags to them. The default
- /// implementation does nothing.
- virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const {
- return MachineMemOperand::MONone;
+ /// Should SelectionDAG lower an atomic store of the given kind as a normal
+ /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
+ /// eventually migrate all targets to the using StoreSDNodes, but porting is
+ /// being done target at a time.
+ virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
+ assert(SI.isAtomic() && "violated precondition");
+ return false;
}
+ /// Should SelectionDAG lower an atomic load of the given kind as a normal
+ /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
+ /// eventually migrate all targets to the using LoadSDNodes, but porting is
+ /// being done target at a time.
+ virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
+ assert(LI.isAtomic() && "violated precondition");
+ return false;
+ }
+
+
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do
@@ -3665,6 +4094,7 @@
C_Register, // Constraint represents specific register(s).
C_RegisterClass, // Constraint represents any of register(s) in class.
C_Memory, // Memory constraint.
+ C_Immediate, // Requires an immediate.
C_Other, // Something else.
C_Unknown // Unsupported constraint.
};
@@ -3725,7 +4155,7 @@
/// string itself isn't empty, there was an error parsing.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
- ImmutableCallSite CS) const;
+ const CallBase &Call) const;
/// Examine constraint type and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -3762,9 +4192,7 @@
StringRef Constraint, MVT VT) const;
virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const {
- if (ConstraintCode == "i")
- return InlineAsm::Constraint_i;
- else if (ConstraintCode == "m")
+ if (ConstraintCode == "m")
return InlineAsm::Constraint_m;
return InlineAsm::Constraint_Unknown;
}
@@ -3782,7 +4210,7 @@
// Lower custom output constraints. If invalid, return SDValue().
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
- SDLoc DL,
+ const SDLoc &DL,
const AsmOperandInfo &OpInfo,
SelectionDAG &DAG) const;
@@ -3849,6 +4277,22 @@
return SDValue();
}
+ /// Return a target-dependent comparison result if the input operand is
+ /// suitable for use with a square root estimate calculation. For example, the
+ /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
+ /// result should be used as the condition operand for a select or branch.
+ virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ return SDValue();
+ }
+
+ /// Return a target-dependent result if the input operand is not suitable for
+ /// use with a square root estimate calculation.
+ virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const {
+ return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType());
+ }
+
//===--------------------------------------------------------------------===//
// Legalization utility functions
//
@@ -3863,7 +4307,7 @@
/// \param RL Low bits of the RHS of the MUL. See LL for meaning
/// \param RH High bits of the RHS of the MUL. See LL for meaning.
/// \returns true if the node has been expanded, false if it has not
- bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS,
+ bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
SelectionDAG &DAG, MulExpansionKind Kind,
SDValue LL = SDValue(), SDValue LH = SDValue(),
@@ -3891,9 +4335,12 @@
/// Expand rotations.
/// \param N Node to expand
+ /// \param AllowVectorOps expand vector rotate, this should only be performed
+ /// if the legalization is happening outside of LegalizeVectorOps
/// \param Result output after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandROT(SDNode *N, bool AllowVectorOps, SDValue &Result,
+ SelectionDAG &DAG) const;
/// Expand float(f32) to SINT(i64) conversion
/// \param N Node to expand
@@ -3904,18 +4351,27 @@
/// Expand float to UINT conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand UINT(i64) to double(f64) conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
+ /// \param N Node to expand
+ /// \returns The expansion result
+ SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
@@ -3942,17 +4398,20 @@
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param IsNegative indicate negated abs
/// \returns True, if the expansion was successful, false otherwise
- bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG,
+ bool IsNegative = false) const;
/// Turn load of vector type into a load of the individual elements.
/// \param LD load to expand
- /// \returns MERGE_VALUEs of the scalar loads with their chains.
- SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const;
+ /// \returns BUILD_VECTOR and TokenFactor nodes.
+ std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const;
// Turn a store of a vector type into stores of the individual elements.
/// \param ST Store with a vector value type
- /// \returns MERGE_VALUs of the individual store chains.
+ /// \returns TokenFactor of the individual store chains.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
/// Expands an unaligned load to 2 half-size loads for an integer, and
@@ -3982,14 +4441,30 @@
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
SDValue Index) const;
+ /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This
+ /// method accepts integers as its arguments.
+ SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const;
+
/// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
/// method accepts integers as its arguments.
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
- /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
- /// integers as its arguments.
+ /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
+ /// method accepts integers as its arguments.
+ SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
+
+ /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
+ /// method accepts integers as its arguments.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
+ /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
+ /// method accepts integers as its arguments.
+ /// Note: This method may fail if the division could not be performed
+ /// within the type. Clients must retry with a wider type if this happens.
+ SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const;
+
/// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
/// always suceeds and populates the Result and Overflow arguments.
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
@@ -4009,6 +4484,13 @@
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
+ /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
+ SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
+
+ /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
+ /// Returns true if the expansion was successful.
+ bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
+
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//
@@ -4059,6 +4541,10 @@
// combiner can fold the new nodes.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
+ /// Give targets the chance to reduce the number of distinct addresing modes.
+ ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType,
+ EVT MemVT, SDValue Offsets) const;
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
@@ -4070,6 +4556,11 @@
DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const;
+
SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
@@ -4077,6 +4568,14 @@
SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const;
+ SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
+ ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/linux-x64/clang/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index a1fb81c..31e08b7 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -14,18 +14,19 @@
#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
class GlobalValue;
class MachineModuleInfo;
-class Mangler;
+class MachineFunction;
class MCContext;
+class MCExpr;
class MCSection;
class MCSymbol;
+class Module;
class TargetMachine;
class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
@@ -35,10 +36,9 @@
protected:
MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
MCSymbolRefExpr::VK_None;
- const TargetMachine *TM;
public:
- TargetLoweringObjectFileELF() = default;
+ TargetLoweringObjectFileELF();
~TargetLoweringObjectFileELF() override = default;
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
@@ -53,7 +53,7 @@
/// placed in.
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
@@ -63,6 +63,13 @@
MCSection *getSectionForJumpTable(const Function &F,
const TargetMachine &TM) const override;
+ MCSection *getSectionForLSDA(const Function &F,
+ const TargetMachine &TM) const override;
+
+ MCSection *
+ getSectionForMachineBasicBlock(const Function &F,
+ const MachineBasicBlock &MBB,
+ const TargetMachine &TM) const override;
bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
const Function &F) const override;
@@ -90,6 +97,9 @@
const GlobalValue *RHS,
const TargetMachine &TM) const override;
+ const MCExpr *lowerDSOLocalEquivalent(const DSOLocalEquivalent *Equiv,
+ const TargetMachine &TM) const override;
+
MCSection *getSectionForCommandLines() const override;
};
@@ -111,7 +121,7 @@
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
/// The mach-o version of this method defaults to returning a stub reference.
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
@@ -126,7 +136,8 @@
MachineModuleInfo *MMI) const override;
/// Get MachO PC relative GOT entry relocation
- const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
+ const MCSymbol *Sym,
const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
@@ -137,6 +148,7 @@
class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
mutable unsigned NextUniqueID = 0;
+ const TargetMachine *TM = nullptr;
public:
~TargetLoweringObjectFileCOFF() override = default;
@@ -162,12 +174,6 @@
MCSection *getStaticDtorSection(unsigned Priority,
const MCSymbol *KeySym) const override;
- void emitLinkerFlagsForGlobal(raw_ostream &OS,
- const GlobalValue *GV) const override;
-
- void emitLinkerFlagsForUsed(raw_ostream &OS,
- const GlobalValue *GV) const override;
-
const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
const GlobalValue *RHS,
const TargetMachine &TM) const override;
@@ -176,7 +182,10 @@
/// information, return a section that it should be placed in.
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
+
+private:
+ void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const;
};
class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
@@ -206,6 +215,66 @@
const TargetMachine &TM) const override;
};
+class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile {
+public:
+ TargetLoweringObjectFileXCOFF() = default;
+ ~TargetLoweringObjectFileXCOFF() override = default;
+
+ static bool ShouldEmitEHBlock(const MachineFunction *MF);
+
+ static MCSymbol *getEHInfoTableSymbol(const MachineFunction *MF);
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+ const Function &F) const override;
+
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ MCSection *getStaticCtorSection(unsigned Priority,
+ const MCSymbol *KeySym) const override;
+ MCSection *getStaticDtorSection(unsigned Priority,
+ const MCSymbol *KeySym) const override;
+
+ const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+ const GlobalValue *RHS,
+ const TargetMachine &TM) const override;
+
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ MCSection *getSectionForJumpTable(const Function &F,
+ const TargetMachine &TM) const override;
+
+ /// Given a constant with the SectionKind, return a section that it should be
+ /// placed in.
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C,
+ Align &Alignment) const override;
+
+ static XCOFF::StorageClass getStorageClassForGlobal(const GlobalValue *GV);
+
+ MCSection *
+ getSectionForFunctionDescriptor(const Function *F,
+ const TargetMachine &TM) const override;
+ MCSection *getSectionForTOCEntry(const MCSymbol *Sym,
+ const TargetMachine &TM) const override;
+
+ /// For external functions, this will always return a function descriptor
+ /// csect.
+ MCSection *
+ getSectionForExternalReference(const GlobalObject *GO,
+ const TargetMachine &TM) const override;
+
+ /// For functions, this will always return a function descriptor symbol.
+ MCSymbol *getTargetSymbol(const GlobalValue *GV,
+ const TargetMachine &TM) const override;
+
+ MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func,
+ const TargetMachine &TM) const override;
+};
+
} // end namespace llvm
#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetPassConfig.h b/linux-x64/clang/include/llvm/CodeGen/TargetPassConfig.h
index 0bd82aa..b478771 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetPassConfig.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetPassConfig.h
@@ -25,6 +25,7 @@
class PassConfigImpl;
class ScheduleDAGInstrs;
class CSEConfigBase;
+class PassInstrumentationCallbacks;
// The old pass manager infrastructure is hidden in a legacy namespace now.
namespace legacy {
@@ -103,6 +104,7 @@
bool Started = true;
bool Stopped = false;
bool AddingMachinePasses = false;
+ bool DebugifyIsSafe = true;
/// Set the StartAfter, StartBefore and StopAfter passes to allow running only
/// a portion of the normal code-gen pass sequence.
@@ -166,8 +168,8 @@
/// If hasLimitedCodeGenPipeline is true, this method
/// returns a string with the name of the options, separated
/// by \p Separator that caused this pipeline to be limited.
- std::string
- getLimitedCodeGenPipelineReason(const char *Separator = "/") const;
+ static std::string
+ getLimitedCodeGenPipelineReason(const char *Separator = "/");
void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
@@ -186,7 +188,7 @@
/// Insert InsertedPassID pass after TargetPassID pass.
void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter = true, bool PrintAfter = true);
+ bool VerifyAfter = true);
/// Allow the target to enable a specific standard pass by default.
void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -280,7 +282,7 @@
///
/// This can also be used to plug a new MachineSchedStrategy into an instance
/// of the standard ScheduleDAGMI:
- /// return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
+ /// return new ScheduleDAGMI(C, std::make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
///
/// Return NULL to select the default (generic) machine scheduler.
virtual ScheduleDAGInstrs *
@@ -306,6 +308,24 @@
/// verification is enabled.
void addVerifyPass(const std::string &Banner);
+ /// Add a pass to add synthesized debug info to the MIR.
+ void addDebugifyPass();
+
+ /// Add a pass to remove debug info from the MIR.
+ void addStripDebugPass();
+
+ /// Add a pass to check synthesized debug info for MIR.
+ void addCheckDebugPass();
+
+ /// Add standard passes before a pass that's about to be added. For example,
+ /// the DebugifyMachineModulePass if it is enabled.
+ void addMachinePrePasses(bool AllowDebugify = true);
+
+ /// Add standard passes after a pass that has just been added. For example,
+ /// the MachineVerifier if it is enabled.
+ void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true,
+ bool AllowStrip = true);
+
/// Check whether or not GlobalISel should abort on error.
/// When this is disabled, GlobalISel will fall back on SDISel instead of
/// erroring out.
@@ -425,32 +445,30 @@
/// Add a CodeGen pass at this point in the pipeline after checking overrides.
/// Return the pass that was added, or zero if no pass was added.
- /// @p printAfter if true and adding a machine function pass add an extra
- /// machine printer pass afterwards
/// @p verifyAfter if true and adding a machine function pass add an extra
/// machine verification pass afterwards.
- AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true,
- bool printAfter = true);
+ AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true);
/// Add a pass to the PassManager if that pass is supposed to be run, as
/// determined by the StartAfter and StopAfter options. Takes ownership of the
/// pass.
- /// @p printAfter if true and adding a machine function pass add an extra
- /// machine printer pass afterwards
/// @p verifyAfter if true and adding a machine function pass add an extra
/// machine verification pass afterwards.
- void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true);
+ void addPass(Pass *P, bool verifyAfter = true);
/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
virtual FunctionPass *createRegAllocPass(bool Optimized);
- /// Add core register alloator passes which do the actual register assignment
+ /// Add core register allocator passes which do the actual register assignment
/// and rewriting. \returns true if any passes were added.
- virtual bool addRegAssignmentFast();
- virtual bool addRegAssignmentOptimized();
+ virtual bool addRegAssignAndRewriteFast();
+ virtual bool addRegAssignAndRewriteOptimized();
};
+void registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_TARGETPASSCONFIG_H
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetRegisterInfo.h b/linux-x64/clang/include/llvm/CodeGen/TargetRegisterInfo.h
index 9a3ab47..6f32729 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -34,12 +34,14 @@
namespace llvm {
class BitVector;
+class DIExpression;
class LiveRegMatrix;
class MachineFunction;
class MachineInstr;
class RegScavenger;
class VirtRegMap;
class LiveIntervals;
+class LiveInterval;
class TargetRegisterClass {
public:
@@ -80,19 +82,27 @@
}
/// Return the specified register in the class.
- unsigned getRegister(unsigned i) const {
+ MCRegister getRegister(unsigned i) const {
return MC->getRegister(i);
}
/// Return true if the specified register is included in this register class.
/// This does not include virtual registers.
- bool contains(unsigned Reg) const {
- return MC->contains(Reg);
+ bool contains(Register Reg) const {
+ /// FIXME: Historically this function has returned false when given vregs
+ /// but it should probably only receive physical registers
+ if (!Reg.isPhysical())
+ return false;
+ return MC->contains(Reg.asMCReg());
}
/// Return true if both registers are in this class.
- bool contains(unsigned Reg1, unsigned Reg2) const {
- return MC->contains(Reg1, Reg2);
+ bool contains(Register Reg1, Register Reg2) const {
+ /// FIXME: Historically this function has returned false when given a vregs
+ /// but it should probably only receive physical registers
+ if (!Reg1.isPhysical() || !Reg2.isPhysical())
+ return false;
+ return MC->contains(Reg1.asMCReg(), Reg2.asMCReg());
}
/// Return the cost of copying a value between two registers in this class.
@@ -258,57 +268,6 @@
// Further sentinels can be allocated from the small negative integers.
// DenseMapInfo<unsigned> uses -1u and -2u.
- /// isStackSlot - Sometimes it is useful the be able to store a non-negative
- /// frame index in a variable that normally holds a register. isStackSlot()
- /// returns true if Reg is in the range used for stack slots.
- ///
- /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
- /// slots, so if a variable may contains a stack slot, always check
- /// isStackSlot() first.
- ///
- static bool isStackSlot(unsigned Reg) {
- return int(Reg) >= (1 << 30);
- }
-
- /// Compute the frame index from a register value representing a stack slot.
- static int stackSlot2Index(unsigned Reg) {
- assert(isStackSlot(Reg) && "Not a stack slot");
- return int(Reg - (1u << 30));
- }
-
- /// Convert a non-negative frame index to a stack slot register value.
- static unsigned index2StackSlot(int FI) {
- assert(FI >= 0 && "Cannot hold a negative frame index.");
- return FI + (1u << 30);
- }
-
- /// Return true if the specified register number is in
- /// the physical register namespace.
- static bool isPhysicalRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return int(Reg) > 0;
- }
-
- /// Return true if the specified register number is in
- /// the virtual register namespace.
- static bool isVirtualRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return int(Reg) < 0;
- }
-
- /// Convert a virtual register number to a 0-based index.
- /// The first virtual register in a function will get the index 0.
- static unsigned virtReg2Index(unsigned Reg) {
- assert(isVirtualRegister(Reg) && "Not a virtual register");
- return Reg & ~(1u << 31);
- }
-
- /// Convert a 0-based index to a virtual register number.
- /// This is the inverse operation of VirtReg2IndexFunctor below.
- static unsigned index2VirtReg(unsigned Index) {
- return Index | (1u << 31);
- }
-
/// Return the size in bits of a register from class RC.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const {
return getRegClassInfo(RC).RegSize;
@@ -326,6 +285,12 @@
return getRegClassInfo(RC).SpillAlignment / 8;
}
+ /// Return the minimum required alignment in bytes for a spill slot for
+ /// a register of this class.
+ Align getSpillAlign(const TargetRegisterClass &RC) const {
+ return Align(getRegClassInfo(RC).SpillAlignment / 8);
+ }
+
/// Return true if the given TargetRegisterClass has the ValueType T.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const {
for (auto I = legalclasstypes_begin(RC); *I != MVT::Other; ++I)
@@ -350,8 +315,8 @@
/// Returns the Register Class of a physical register of the given type,
/// picking the most sub register class of the right type that contains this
/// physreg.
- const TargetRegisterClass *
- getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+ const TargetRegisterClass *getMinimalPhysRegClass(MCRegister Reg,
+ MVT VT = MVT::Other) const;
/// Return the maximal subclass of the given register class that is
/// allocatable or NULL.
@@ -366,12 +331,12 @@
/// Return the additional cost of using this register instead
/// of other registers in its class.
- unsigned getCostPerUse(unsigned RegNo) const {
+ unsigned getCostPerUse(MCRegister RegNo) const {
return InfoDesc[RegNo].CostPerUse;
}
/// Return true if the register is in the allocation of any register class.
- bool isInAllocatableClass(unsigned RegNo) const {
+ bool isInAllocatableClass(MCRegister RegNo) const {
return InfoDesc[RegNo].inAllocatableClass;
}
@@ -419,14 +384,14 @@
/// Returns true if the two registers are equal or alias each other.
/// The registers may be virtual registers.
- bool regsOverlap(unsigned regA, unsigned regB) const {
+ bool regsOverlap(Register regA, Register regB) const {
if (regA == regB) return true;
- if (isVirtualRegister(regA) || isVirtualRegister(regB))
+ if (!regA.isPhysical() || !regB.isPhysical())
return false;
// Regunits are numerically ordered. Find a common unit.
- MCRegUnitIterator RUA(regA, this);
- MCRegUnitIterator RUB(regB, this);
+ MCRegUnitIterator RUA(regA.asMCReg(), this);
+ MCRegUnitIterator RUB(regB.asMCReg(), this);
do {
if (*RUA == *RUB) return true;
if (*RUA < *RUB) ++RUA;
@@ -436,9 +401,9 @@
}
/// Returns true if Reg contains RegUnit.
- bool hasRegUnit(unsigned Reg, unsigned RegUnit) const {
+ bool hasRegUnit(MCRegister Reg, Register RegUnit) const {
for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
- if (*Units == RegUnit)
+ if (Register(*Units) == RegUnit)
return true;
return false;
}
@@ -447,7 +412,7 @@
/// operation, in which case we chain backwards through all such operations
/// to the ultimate source register. If a physical register is encountered,
/// we stop the search.
- virtual unsigned lookThruCopyLike(unsigned SrcReg,
+ virtual Register lookThruCopyLike(Register SrcReg,
const MachineRegisterInfo *MRI) const;
/// Return a null-terminated list of all of the callee-saved registers on
@@ -484,11 +449,26 @@
return nullptr;
}
+ /// Return a register mask for the registers preserved by the unwinder,
+ /// or nullptr if no custom mask is needed.
+ virtual const uint32_t *
+ getCustomEHPadPreservedMask(const MachineFunction &MF) const {
+ return nullptr;
+ }
+
/// Return a register mask that clobbers everything.
virtual const uint32_t *getNoPreservedMask() const {
llvm_unreachable("target does not provide no preserved mask");
}
+ /// Return a list of all of the registers which are clobbered "inside" a call
+ /// to the given function. For example, these might be needed for PLT
+ /// sequences of long-branch veneers.
+ virtual ArrayRef<MCPhysReg>
+ getIntraCallClobberedRegs(const MachineFunction *MF) const {
+ return {};
+ }
+
/// Return true if all bits that are set in mask \p mask0 are also set in
/// \p mask1.
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
@@ -512,13 +492,19 @@
/// Returns false if we can't guarantee that Physreg, specified as an IR asm
/// clobber constraint, will be preserved across the statement.
virtual bool isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const {
+ MCRegister PhysReg) const {
return true;
}
+ /// Returns true if PhysReg cannot be written to in inline asm statements.
+ virtual bool isInlineAsmReadOnlyReg(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return false;
+ }
+
/// Returns true if PhysReg is unallocatable and constant throughout the
/// function. Used by MachineRegisterInfo::isConstantPhysReg().
- virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
+ virtual bool isConstantPhysReg(MCRegister PhysReg) const { return false; }
/// Returns true if the register class is considered divergent.
virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
@@ -530,8 +516,15 @@
/// have call sequences where a GOT register may be updated by the caller
/// prior to a call and is guaranteed to be restored (also by the caller)
/// after the call.
- virtual bool isCallerPreservedPhysReg(unsigned PhysReg,
- const MachineFunction &MF) const;
+ virtual bool isCallerPreservedPhysReg(MCRegister PhysReg,
+ const MachineFunction &MF) const {
+ return false;
+ }
+
+ /// This is a wrapper around getCallPreservedMask().
+ /// Return true if the register is preserved after the call.
+ virtual bool isCalleeSavedPhysReg(MCRegister PhysReg,
+ const MachineFunction &MF) const;
/// Prior to adding the live-out mask to a stackmap or patchpoint
/// instruction, provide the target the opportunity to adjust it (mainly to
@@ -540,8 +533,8 @@
/// Return a super-register of the specified register
/// Reg so its sub-register of index SubIdx is Reg.
- unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
- const TargetRegisterClass *RC) const {
+ MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx,
+ const TargetRegisterClass *RC) const {
return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC);
}
@@ -625,8 +618,8 @@
}
/// Debugging helper: dump register in human readable form to dbgs() stream.
- static void dumpReg(unsigned Reg, unsigned SubRegIndex = 0,
- const TargetRegisterInfo* TRI = nullptr);
+ static void dumpReg(Register Reg, unsigned SubRegIndex = 0,
+ const TargetRegisterInfo *TRI = nullptr);
protected:
/// Overridden by TableGen in targets that have sub-registers.
@@ -707,13 +700,9 @@
/// Find the largest common subclass of A and B.
/// Return NULL if there is no common subclass.
- /// The common subclass should contain
- /// simple value type SVT if it is not the Any type.
const TargetRegisterClass *
getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- const MVT::SimpleValueType SVT =
- MVT::SimpleValueType::Any) const;
+ const TargetRegisterClass *B) const;
/// Returns a TargetRegisterClass used for pointer values.
/// If a target supports multiple different pointer register classes,
@@ -769,7 +758,7 @@
const TargetRegisterClass *RC) const = 0;
/// Returns size in bits of a phys/virtual/generic register.
- unsigned getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const;
+ unsigned getRegSizeInBits(Register Reg, const MachineRegisterInfo &MRI) const;
/// Get the weight in units of pressure for this register unit.
virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
@@ -808,20 +797,19 @@
/// independent register allocation hints. Targets that override this
/// function should typically call this default implementation as well and
/// expect to see generic copy hints added.
- virtual bool getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
- SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM = nullptr,
- const LiveRegMatrix *Matrix = nullptr)
- const;
+ virtual bool
+ getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM = nullptr,
+ const LiveRegMatrix *Matrix = nullptr) const;
/// A callback to allow target a chance to update register allocation hints
/// when a register is "changed" (e.g. coalesced) to another register.
/// e.g. On ARM, some virtual registers should target register pairs,
/// if one of pair is coalesced to another register, the allocation hint of
/// the other half of the pair should be changed to point to the new register.
- virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg,
+ virtual void updateRegAllocHint(Register Reg, Register NewReg,
MachineFunction &MF) const {
// Do nothing.
}
@@ -879,14 +867,14 @@
/// spill slot. This tells PEI not to create a new stack frame
/// object for the given register. It should be called only after
/// determineCalleeSaves().
- virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ virtual bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const {
return false;
}
/// Returns true if the live-ins should be tracked after register allocation.
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return false;
+ return true;
}
/// True if the stack can be realigned for the target.
@@ -916,7 +904,7 @@
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
/// before insertion point I.
virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
+ Register BaseReg, int FrameIdx,
int64_t Offset) const {
llvm_unreachable("materializeFrameBaseRegister does not exist on this "
"target");
@@ -924,18 +912,27 @@
/// Resolve a frame index operand of an instruction
/// to reference the indicated base register plus offset instead.
- virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ virtual void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
llvm_unreachable("resolveFrameIndex does not exist on this target");
}
/// Determine whether a given base register plus offset immediate is
/// encodable to resolve a frame index.
- virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ virtual bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const {
llvm_unreachable("isFrameOffsetLegal does not exist on this target");
}
+ /// Gets the DWARF expression opcodes for \p Offset.
+ virtual void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const;
+
+ /// Prepends a DWARF expression for \p Offset to DIExpression \p Expr.
+ DIExpression *
+ prependOffsetExpression(const DIExpression *Expr, unsigned PrependFlags,
+ const StackOffset &Offset) const;
+
/// Spill the register so it can be used by the register scavenger.
/// Return true if the register was spilled, false otherwise.
/// If this function does not spill the register, the scavenger
@@ -944,7 +941,7 @@
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
- unsigned Reg) const {
+ Register Reg) const {
return false;
}
@@ -960,7 +957,7 @@
RegScavenger *RS = nullptr) const = 0;
/// Return the assembly name for \p Reg.
- virtual StringRef getRegAsmName(unsigned Reg) const {
+ virtual StringRef getRegAsmName(MCRegister Reg) const {
// FIXME: We are assuming that the assembly name is equal to the TableGen
// name converted to lower case
//
@@ -983,6 +980,42 @@
LiveIntervals &LIS) const
{ return true; }
+ /// Region split has a high compile time cost especially for large live range.
+ /// This method is used to decide whether or not \p VirtReg should
+ /// go through this expensive splitting heuristic.
+ virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF,
+ const LiveInterval &VirtReg) const;
+
+ /// Last chance recoloring has a high compile time cost especially for
+ /// targets with a lot of registers.
+ /// This method is used to decide whether or not \p VirtReg should
+ /// go through this expensive heuristic.
+ /// When this target hook is hit, by returning false, there is a high
+ /// chance that the register allocation will fail altogether (usually with
+ /// "ran out of registers").
+ /// That said, this error usually points to another problem in the
+ /// optimization pipeline.
+ virtual bool
+ shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF,
+ const LiveInterval &VirtReg) const {
+ return true;
+ }
+
+ /// Deferred spilling delays the spill insertion of a virtual register
+ /// after every other allocation. By deferring the spilling, it is
+ /// sometimes possible to eliminate that spilling altogether because
+ /// something else could have been eliminated, thus leaving some space
+ /// for the virtual register.
+ /// However, this comes with a compile time impact because it adds one
+ /// more stage to the greedy register allocator.
+ /// This method is used to decide whether \p VirtReg should use the deferred
+ /// spilling stage instead of being spilled right away.
+ virtual bool
+ shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF,
+ const LiveInterval &VirtReg) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
/// Debug information queries.
@@ -991,7 +1024,7 @@
virtual Register getFrameRegister(const MachineFunction &MF) const = 0;
/// Mark a register and all its aliases as reserved in the given set.
- void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const;
+ void markSuperRegs(BitVector &RegisterSet, MCRegister Reg) const;
/// Returns true if for every register in the set all super registers are part
/// of the set as well.
@@ -1003,6 +1036,13 @@
const MachineRegisterInfo &MRI) const {
return nullptr;
}
+
+ /// Returns the physical register number of sub-register "Index"
+ /// for physical register RegNo. Return zero if the sub-register does not
+ /// exist.
+ inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const {
+ return static_cast<const MCRegisterInfo *>(this)->getSubReg(Reg, Idx);
+ }
};
//===----------------------------------------------------------------------===//
@@ -1152,9 +1192,9 @@
// This is useful when building IndexedMaps keyed on virtual registers
struct VirtReg2IndexFunctor {
- using argument_type = unsigned;
- unsigned operator()(unsigned Reg) const {
- return TargetRegisterInfo::virtReg2Index(Reg);
+ using argument_type = Register;
+ unsigned operator()(Register Reg) const {
+ return Register::virtReg2Index(Reg);
}
};
@@ -1168,7 +1208,7 @@
/// %physreg17 - a physical register when no TRI instance given.
///
/// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n';
-Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
+Printable printReg(Register Reg, const TargetRegisterInfo *TRI = nullptr,
unsigned SubIdx = 0,
const MachineRegisterInfo *MRI = nullptr);
@@ -1188,7 +1228,7 @@
/// Create Printable object to print register classes or register banks
/// on a \ref raw_ostream.
-Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
+Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI);
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetSchedule.h b/linux-x64/clang/include/llvm/CodeGen/TargetSchedule.h
index cce85c8..aa6b82e 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetSchedule.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetSchedule.h
@@ -37,8 +37,12 @@
const TargetInstrInfo *TII = nullptr;
SmallVector<unsigned, 16> ResourceFactors;
- unsigned MicroOpFactor; // Multiply to normalize microops to resource units.
- unsigned ResourceLCM; // Resource units per cycle. Latency normalization factor.
+
+ // Multiply to normalize microops to resource units.
+ unsigned MicroOpFactor = 0;
+
+ // Resource units per cycle. Latency normalization factor.
+ unsigned ResourceLCM = 0;
unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
diff --git a/linux-x64/clang/include/llvm/CodeGen/TargetSubtargetInfo.h b/linux-x64/clang/include/llvm/CodeGen/TargetSubtargetInfo.h
index 037fc3e..3fac2f6 100644
--- a/linux-x64/clang/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -25,10 +25,10 @@
#include <memory>
#include <vector>
-
namespace llvm {
class CallLowering;
+class InlineAsmLowering;
class InstrItineraryData;
struct InstrStage;
class InstructionSelector;
@@ -41,9 +41,6 @@
class RegisterBankInfo;
class SDep;
class SelectionDAGTargetInfo;
-struct SubtargetFeatureKV;
-struct SubtargetSubTypeKV;
-struct SubtargetInfoKV;
class SUnit;
class TargetFrameLowering;
class TargetInstrInfo;
@@ -61,8 +58,8 @@
///
class TargetSubtargetInfo : public MCSubtargetInfo {
protected: // Can only create subclasses...
- TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF,
+ TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL,
@@ -102,16 +99,18 @@
}
virtual const CallLowering *getCallLowering() const { return nullptr; }
+ virtual const InlineAsmLowering *getInlineAsmLowering() const {
+ return nullptr;
+ }
+
// FIXME: This lets targets specialize the selector by subtarget (which lets
// us do things like a dedicated avx512 selector). However, we might want
// to also specialize selectors by MachineFunction, which would let us be
// aware of optsize/optnone and such.
- virtual const InstructionSelector *getInstructionSelector() const {
+ virtual InstructionSelector *getInstructionSelector() const {
return nullptr;
}
- virtual unsigned getHwMode() const { return 0; }
-
/// Target can subclass this hook to select a different DAG scheduler.
virtual RegisterScheduler::FunctionPassCtor
getDAGScheduler(CodeGenOpt::Level) const {
@@ -208,6 +207,10 @@
/// which is the preferred way to influence this.
virtual bool enablePostRAScheduler() const;
+ /// True if the subtarget should run a machine scheduler after register
+ /// allocation.
+ virtual bool enablePostRAMachineScheduler() const;
+
/// True if the subtarget should run the atomic expansion pass.
virtual bool enableAtomicExpand() const;
@@ -222,9 +225,13 @@
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {}
- // Perform target specific adjustments to the latency of a schedule
+ // Perform target-specific adjustments to the latency of a schedule
// dependency.
- virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {}
+ // If a pair of operands is associated with the schedule dependency, DefOpIdx
+ // and UseOpIdx are the indices of the operands in Def and Use, respectively.
+ // Otherwise, either may be -1.
+ virtual void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
+ int UseOpIdx, SDep &Dep) const {}
// For use with PostRAScheduling: get the anti-dependence breaking that should
// be performed before post-RA scheduling.
@@ -274,6 +281,12 @@
/// scheduling, DAGCombine, etc.).
virtual bool useAA() const;
+ /// \brief Sink addresses into blocks using GEP instructions rather than
+ /// pointer casts and arithmetic.
+ virtual bool addrSinkUsingGEPs() const {
+ return useAA();
+ }
+
/// Enable the use of the early if conversion pass.
virtual bool enableEarlyIfConversion() const { return false; }
diff --git a/linux-x64/clang/include/llvm/CodeGen/TileShapeInfo.h b/linux-x64/clang/include/llvm/CodeGen/TileShapeInfo.h
new file mode 100644
index 0000000..031d235
--- /dev/null
+++ b/linux-x64/clang/include/llvm/CodeGen/TileShapeInfo.h
@@ -0,0 +1,97 @@
+//===- llvm/CodeGen/TileShapeInfo.h - ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Shape utility for AMX.
+/// AMX hardware requires to config the shape of tile data register before use.
+/// The 2D shape includes row and column. In AMX intrinsics interface the shape
+/// is passed as 1st and 2nd parameter and they are lowered as the 1st and 2nd
+/// machine operand of AMX pseudo instructions. ShapeT class is to facilitate
+/// tile config and register allocator. The row and column are machine operand
+/// of AMX pseudo instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TILESHAPEINFO_H
+#define LLVM_CODEGEN_TILESHAPEINFO_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include <utility>
+
+namespace llvm {
+
+class ShapeT {
+public:
+ ShapeT(MachineOperand *Row, MachineOperand *Col,
+ const MachineRegisterInfo *MRI = nullptr)
+ : Row(Row), Col(Col) {
+ if (MRI)
+ deduceImm(MRI);
+ }
+ ShapeT()
+ : Row(nullptr), Col(nullptr), RowImm(InvalidImmShape),
+ ColImm(InvalidImmShape) {}
+ bool operator==(const ShapeT &Shape) {
+ MachineOperand *R = Shape.Row;
+ MachineOperand *C = Shape.Col;
+ if (!R || !C)
+ return false;
+ if (!Row || !Col)
+ return false;
+ if (Row->getReg() == R->getReg() && Col->getReg() == C->getReg())
+ return true;
+ if ((RowImm != InvalidImmShape) && (ColImm != InvalidImmShape))
+ return RowImm == Shape.getRowImm() && ColImm == Shape.getColImm();
+ return false;
+ }
+
+ bool operator!=(const ShapeT &Shape) { return !(*this == Shape); }
+
+ MachineOperand *getRow() const { return Row; }
+
+ MachineOperand *getCol() const { return Col; }
+
+ int64_t getRowImm() const { return RowImm; }
+
+ int64_t getColImm() const { return ColImm; }
+
+ bool isValid() { return (Row != nullptr) && (Col != nullptr); }
+
+ void deduceImm(const MachineRegisterInfo *MRI) {
+ // All def must be the same value, otherwise it is invalid MIs.
+ // Find the immediate.
+ // TODO copy propagation.
+ auto GetImm = [&](Register Reg) {
+ int64_t Imm = InvalidImmShape;
+ for (const MachineOperand &DefMO : MRI->def_operands(Reg)) {
+ const auto *MI = DefMO.getParent();
+ if (MI->isMoveImmediate()) {
+ Imm = MI->getOperand(1).getImm();
+ break;
+ }
+ }
+ return Imm;
+ };
+ RowImm = GetImm(Row->getReg());
+ ColImm = GetImm(Col->getReg());
+ }
+
+private:
+ static constexpr int64_t InvalidImmShape = -1;
+ MachineOperand *Row;
+ MachineOperand *Col;
+ int64_t RowImm;
+ int64_t ColImm;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/llvm/CodeGen/ValueTypes.h b/linux-x64/clang/include/llvm/CodeGen/ValueTypes.h
index c540c94..888b83d 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ValueTypes.h
+++ b/linux-x64/clang/include/llvm/CodeGen/ValueTypes.h
@@ -18,6 +18,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/WithColor.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -74,38 +76,33 @@
MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable);
if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
return M;
-
- assert(!IsScalable && "We don't support extended scalable types yet");
- return getExtendedVectorVT(Context, VT, NumElements);
+ return getExtendedVectorVT(Context, VT, NumElements, IsScalable);
}
/// Returns the EVT that represents a vector EC.Min elements in length,
/// where each element is of type VT.
- static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) {
+ static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
MVT M = MVT::getVectorVT(VT.V, EC);
if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
return M;
- assert (!EC.Scalable && "We don't support extended scalable types yet");
- return getExtendedVectorVT(Context, VT, EC.Min);
+ return getExtendedVectorVT(Context, VT, EC);
}
/// Return a vector with the same number of elements as this vector, but
/// with the element type converted to an integer type with the same
/// bitwidth.
EVT changeVectorElementTypeToInteger() const {
- if (!isSimple()) {
- assert (!isScalableVector() &&
- "We don't support extended scalable types yet");
- return changeExtendedVectorElementTypeToInteger();
- }
- MVT EltTy = getSimpleVT().getVectorElementType();
- unsigned BitWidth = EltTy.getSizeInBits();
- MVT IntTy = MVT::getIntegerVT(BitWidth);
- MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(),
- isScalableVector());
- assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
- "Simple vector VT not representable by simple integer vector VT!");
- return VecTy;
+ if (isSimple())
+ return getSimpleVT().changeVectorElementTypeToInteger();
+ return changeExtendedVectorElementTypeToInteger();
+ }
+
+ /// Return a VT for a vector type whose attributes match ourselves
+ /// with the exception of the element type that is chosen by the caller.
+ EVT changeVectorElementType(EVT EltVT) const {
+ if (isSimple() && EltVT.isSimple())
+ return getSimpleVT().changeVectorElementType(EltVT.getSimpleVT());
+ return changeExtendedVectorElementType(EltVT);
}
/// Return the type converted to an equivalently sized integer or vector
@@ -116,8 +113,7 @@
return changeVectorElementTypeToInteger();
if (isSimple())
- return MVT::getIntegerVT(getSizeInBits());
-
+ return getSimpleVT().changeTypeToInteger();
return changeExtendedTypeToInteger();
}
@@ -154,12 +150,12 @@
/// Return true if this is a vector type where the runtime
/// length is machine dependent
bool isScalableVector() const {
- // FIXME: We don't support extended scalable types yet, because the
- // matching IR type doesn't exist. Once it has been added, this can
- // be changed to call isExtendedScalableVector.
- if (!isSimple())
- return false;
- return V.isScalableVector();
+ return isSimple() ? V.isScalableVector() : isExtendedScalableVector();
+ }
+
+ bool isFixedLengthVector() const {
+ return isSimple() ? V.isFixedLengthVector()
+ : isExtendedFixedLengthVector();
}
/// Return true if this is a 16-bit vector type.
@@ -208,12 +204,12 @@
}
/// Return true if the bit size is a multiple of 8.
- bool isByteSized() const {
- return (getSizeInBits() & 7) == 0;
- }
+ bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
/// Return true if the size is a power-of-two number of bytes.
bool isRound() const {
+ if (isScalableVector())
+ return false;
unsigned BitSize = getSizeInBits();
return BitSize >= 8 && !(BitSize & (BitSize - 1));
}
@@ -224,28 +220,58 @@
return getSizeInBits() == VT.getSizeInBits();
}
+ /// Return true if we know at compile time this has more bits than VT.
+ bool knownBitsGT(EVT VT) const {
+ return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has more than or the same
+ /// bits as VT.
+ bool knownBitsGE(EVT VT) const {
+ return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer bits than VT.
+ bool knownBitsLT(EVT VT) const {
+ return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer than or the same
+ /// bits as VT.
+ bool knownBitsLE(EVT VT) const {
+ return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
+ }
+
/// Return true if this has more bits than VT.
bool bitsGT(EVT VT) const {
if (EVT::operator==(VT)) return false;
- return getSizeInBits() > VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGT(VT);
}
/// Return true if this has no less bits than VT.
bool bitsGE(EVT VT) const {
if (EVT::operator==(VT)) return true;
- return getSizeInBits() >= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGE(VT);
}
/// Return true if this has less bits than VT.
bool bitsLT(EVT VT) const {
if (EVT::operator==(VT)) return false;
- return getSizeInBits() < VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLT(VT);
}
/// Return true if this has no more bits than VT.
bool bitsLE(EVT VT) const {
if (EVT::operator==(VT)) return true;
- return getSizeInBits() <= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLE(VT);
}
/// Return the SimpleValueType held in the specified simple EVT.
@@ -270,43 +296,74 @@
/// Given a vector type, return the number of elements it contains.
unsigned getVectorNumElements() const {
+#ifdef STRICT_FIXED_SIZE_VECTORS
+ assert(isFixedLengthVector() && "Invalid vector type!");
+#else
assert(isVector() && "Invalid vector type!");
+ if (isScalableVector())
+ WithColor::warning()
+ << "Possible incorrect use of EVT::getVectorNumElements() for "
+ "scalable vector. Scalable flag may be dropped, use "
+ "EVT::getVectorElementCount() instead\n";
+#endif
if (isSimple())
return V.getVectorNumElements();
return getExtendedVectorNumElements();
}
// Given a (possibly scalable) vector type, return the ElementCount
- MVT::ElementCount getVectorElementCount() const {
+ ElementCount getVectorElementCount() const {
assert((isVector()) && "Invalid vector type!");
if (isSimple())
return V.getVectorElementCount();
- assert(!isScalableVector() &&
- "We don't support extended scalable types yet");
- return {getExtendedVectorNumElements(), false};
+ return getExtendedVectorElementCount();
+ }
+
+ /// Given a vector type, return the minimum number of elements it contains.
+ unsigned getVectorMinNumElements() const {
+ return getVectorElementCount().getKnownMinValue();
}
/// Return the size of the specified value type in bits.
- unsigned getSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getSizeInBits() const {
if (isSimple())
return V.getSizeInBits();
return getExtendedSizeInBits();
}
- unsigned getScalarSizeInBits() const {
- return getScalarType().getSizeInBits();
+ /// Return the size of the specified fixed width value type in bits. The
+ /// function will assert if the type is scalable.
+ uint64_t getFixedSizeInBits() const {
+ return getSizeInBits().getFixedSize();
+ }
+
+ uint64_t getScalarSizeInBits() const {
+ return getScalarType().getSizeInBits().getFixedSize();
}
/// Return the number of bytes overwritten by a store of the specified value
/// type.
- unsigned getStoreSize() const {
- return (getSizeInBits() + 7) / 8;
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSize() const {
+ TypeSize BaseSize = getSizeInBits();
+ return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()};
}
/// Return the number of bits overwritten by a store of the specified value
/// type.
- unsigned getStoreSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSizeInBits() const {
return getStoreSize() * 8;
}
@@ -350,13 +407,22 @@
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
EVT EltVT = getVectorElementType();
auto EltCnt = getVectorElementCount();
- assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
- return EVT::getVectorVT(Context, EltVT, EltCnt / 2);
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ return EVT::getVectorVT(Context, EltVT, EltCnt.divideCoefficientBy(2));
+ }
+
+ // Return a VT for a vector type with the same element type but
+ // double the number of elements. The type returned may be an
+ // extended type.
+ EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const {
+ EVT EltVT = getVectorElementType();
+ auto EltCnt = getVectorElementCount();
+ return EVT::getVectorVT(Context, EltVT, EltCnt * 2);
}
/// Returns true if the given vector is a power of 2.
bool isPow2VectorType() const {
- unsigned NElts = getVectorNumElements();
+ unsigned NElts = getVectorMinNumElements();
return !(NElts & (NElts - 1));
}
@@ -364,10 +430,10 @@
/// and returns that type.
EVT getPow2VectorType(LLVMContext &Context) const {
if (!isPow2VectorType()) {
- unsigned NElts = getVectorNumElements();
- unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
- return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts,
- isScalableVector());
+ ElementCount NElts = getVectorElementCount();
+ unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
+ NElts = ElementCount::get(NewMinCount, NElts.isScalable());
+ return EVT::getVectorVT(Context, getVectorElementType(), NElts);
}
else {
return *this;
@@ -410,10 +476,13 @@
// These are all out-of-line to prevent users of this header file
// from having a dependency on Type.h.
EVT changeExtendedTypeToInteger() const;
+ EVT changeExtendedVectorElementType(EVT EltVT) const;
EVT changeExtendedVectorElementTypeToInteger() const;
static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
- static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
- unsigned NumElements);
+ static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, unsigned NumElements,
+ bool IsScalable);
+ static EVT getExtendedVectorVT(LLVMContext &Context, EVT VT,
+ ElementCount EC);
bool isExtendedFloatingPoint() const LLVM_READONLY;
bool isExtendedInteger() const LLVM_READONLY;
bool isExtendedScalarInteger() const LLVM_READONLY;
@@ -426,9 +495,12 @@
bool isExtended512BitVector() const LLVM_READONLY;
bool isExtended1024BitVector() const LLVM_READONLY;
bool isExtended2048BitVector() const LLVM_READONLY;
+ bool isExtendedFixedLengthVector() const LLVM_READONLY;
+ bool isExtendedScalableVector() const LLVM_READONLY;
EVT getExtendedVectorElementType() const;
unsigned getExtendedVectorNumElements() const LLVM_READONLY;
- unsigned getExtendedSizeInBits() const LLVM_READONLY;
+ ElementCount getExtendedVectorElementCount() const LLVM_READONLY;
+ TypeSize getExtendedSizeInBits() const LLVM_READONLY;
};
} // end namespace llvm
diff --git a/linux-x64/clang/include/llvm/CodeGen/ValueTypes.td b/linux-x64/clang/include/llvm/CodeGen/ValueTypes.td
index feea7e5..d13d0a7 100644
--- a/linux-x64/clang/include/llvm/CodeGen/ValueTypes.td
+++ b/linux-x64/clang/include/llvm/CodeGen/ValueTypes.td
@@ -25,142 +25,179 @@
def i32 : ValueType<32 , 5>; // 32-bit integer value
def i64 : ValueType<64 , 6>; // 64-bit integer value
def i128 : ValueType<128, 7>; // 128-bit integer value
-def f16 : ValueType<16 , 8>; // 16-bit floating point value
-def f32 : ValueType<32 , 9>; // 32-bit floating point value
-def f64 : ValueType<64 , 10>; // 64-bit floating point value
-def f80 : ValueType<80 , 11>; // 80-bit floating point value
-def f128 : ValueType<128, 12>; // 128-bit floating point value
-def ppcf128: ValueType<128, 13>; // PPC 128-bit floating point value
-def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value
-def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value
-def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value
-def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value
-def v16i1 : ValueType<16, 18>; // 16 x i1 vector value
-def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value
-def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value
-def v128i1 : ValueType<128, 21>; // 128 x i1 vector value
-def v512i1 : ValueType<512, 22>; // 512 x i1 vector value
-def v1024i1: ValueType<1024,23>; //1024 x i1 vector value
+def bf16 : ValueType<16 , 8>; // 16-bit brain floating point value
+def f16 : ValueType<16 , 9>; // 16-bit floating point value
+def f32 : ValueType<32 , 10>; // 32-bit floating point value
+def f64 : ValueType<64 , 11>; // 64-bit floating point value
+def f80 : ValueType<80 , 12>; // 80-bit floating point value
+def f128 : ValueType<128, 13>; // 128-bit floating point value
+def ppcf128: ValueType<128, 14>; // PPC 128-bit floating point value
-def v1i8 : ValueType<8, 24>; // 1 x i8 vector value
-def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value
-def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value
-def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 28>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 29>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 30>; // 64 x i8 vector value
-def v128i8 : ValueType<1024,31>; //128 x i8 vector value
-def v256i8 : ValueType<2048,32>; //256 x i8 vector value
+def v1i1 : ValueType<1 , 15>; // 1 x i1 vector value
+def v2i1 : ValueType<2 , 16>; // 2 x i1 vector value
+def v4i1 : ValueType<4 , 17>; // 4 x i1 vector value
+def v8i1 : ValueType<8 , 18>; // 8 x i1 vector value
+def v16i1 : ValueType<16, 19>; // 16 x i1 vector value
+def v32i1 : ValueType<32 , 20>; // 32 x i1 vector value
+def v64i1 : ValueType<64 , 21>; // 64 x i1 vector value
+def v128i1 : ValueType<128, 22>; // 128 x i1 vector value
+def v256i1 : ValueType<256, 23>; // 256 x i1 vector value
+def v512i1 : ValueType<512, 24>; // 512 x i1 vector value
+def v1024i1: ValueType<1024,25>; //1024 x i1 vector value
-def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value
-def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value
-def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 36>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 37>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 38>; // 32 x i16 vector value
-def v64i16 : ValueType<1024,39>; // 64 x i16 vector value
-def v128i16: ValueType<2048,40>; //128 x i16 vector value
+def v1i8 : ValueType<8, 26>; // 1 x i8 vector value
+def v2i8 : ValueType<16 , 27>; // 2 x i8 vector value
+def v4i8 : ValueType<32 , 28>; // 4 x i8 vector value
+def v8i8 : ValueType<64 , 29>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 30>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 31>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 32>; // 64 x i8 vector value
+def v128i8 : ValueType<1024,33>; //128 x i8 vector value
+def v256i8 : ValueType<2048,34>; //256 x i8 vector value
-def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value
-def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value
-def v4i32 : ValueType<128, 44>; // 4 x i32 vector value
-def v5i32 : ValueType<160, 45>; // 5 x i32 vector value
-def v8i32 : ValueType<256, 46>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 47>; // 16 x i32 vector value
-def v32i32 : ValueType<1024,48>; // 32 x i32 vector value
-def v64i32 : ValueType<2048,49>; // 64 x i32 vector value
-def v128i32 : ValueType<4096,50>; // 128 x i32 vector value
-def v256i32 : ValueType<8182,51>; // 256 x i32 vector value
-def v512i32 : ValueType<16384,52>; // 512 x i32 vector value
-def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value
-def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value
+def v1i16 : ValueType<16 , 35>; // 1 x i16 vector value
+def v2i16 : ValueType<32 , 36>; // 2 x i16 vector value
+def v3i16 : ValueType<48 , 37>; // 3 x i16 vector value
+def v4i16 : ValueType<64 , 38>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 39>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 40>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 41>; // 32 x i16 vector value
+def v64i16 : ValueType<1024,42>; // 64 x i16 vector value
+def v128i16: ValueType<2048,43>; //128 x i16 vector value
-def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 56>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 57>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 58>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,59>; // 16 x i64 vector value
-def v32i64 : ValueType<2048,60>; // 32 x i64 vector value
+def v1i32 : ValueType<32 , 44>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 45>; // 2 x i32 vector value
+def v3i32 : ValueType<96 , 46>; // 3 x i32 vector value
+def v4i32 : ValueType<128, 47>; // 4 x i32 vector value
+def v5i32 : ValueType<160, 48>; // 5 x i32 vector value
+def v8i32 : ValueType<256, 49>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 50>; // 16 x i32 vector value
+def v32i32 : ValueType<1024,51>; // 32 x i32 vector value
+def v64i32 : ValueType<2048,52>; // 64 x i32 vector value
+def v128i32 : ValueType<4096,53>; // 128 x i32 vector value
+def v256i32 : ValueType<8182,54>; // 256 x i32 vector value
+def v512i32 : ValueType<16384,55>; // 512 x i32 vector value
+def v1024i32 : ValueType<32768,56>; // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,57>; // 2048 x i32 vector value
-def v1i128 : ValueType<128, 61>; // 1 x i128 vector value
+def v1i64 : ValueType<64 , 58>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 59>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 60>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 61>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,62>; // 16 x i64 vector value
+def v32i64 : ValueType<2048,63>; // 32 x i64 vector value
+def v64i64 : ValueType<4096,64>; // 64 x i64 vector value
+def v128i64: ValueType<8192,65>; // 128 x i64 vector value
+def v256i64: ValueType<16384,66>; // 256 x i64 vector value
-def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value
+def v1i128 : ValueType<128, 67>; // 1 x i128 vector value
-def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value
+def v2f16 : ValueType<32 , 68>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 69>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 70>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 71>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 72>; // 16 x f16 vector value
+def v32f16 : ValueType<512, 73>; // 32 x f16 vector value
+def v64f16 : ValueType<1024, 74>; // 64 x f16 vector value
+def v128f16 : ValueType<2048, 75>; // 128 x f16 vector value
+def v2bf16 : ValueType<32 , 76>; // 2 x bf16 vector value
+def v3bf16 : ValueType<48 , 77>; // 3 x bf16 vector value
+def v4bf16 : ValueType<64 , 78>; // 4 x bf16 vector value
+def v8bf16 : ValueType<128, 79>; // 8 x bf16 vector value
+def v16bf16 : ValueType<256, 80>; // 16 x bf16 vector value
+def v32bf16 : ValueType<512, 81>; // 32 x bf16 vector value
+def v64bf16 : ValueType<1024, 82>; // 64 x bf16 vector value
+def v128bf16 : ValueType<2048, 83>; // 128 x bf16 vector value
+def v1f32 : ValueType<32 , 84>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 85>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 86>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 87>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 88>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 89>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 90>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 91>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 92>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 93>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 94>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 95>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 96>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 97>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 98>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 99>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 100>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 101>; // 8 x f64 vector value
+def v16f64 : ValueType<1024, 102>; // 16 x f64 vector value
+def v32f64 : ValueType<2048, 103>; // 32 x f64 vector value
+def v64f64 : ValueType<4096, 104>; // 64 x f64 vector value
+def v128f64 : ValueType<8192, 105>; // 128 x f64 vector value
+def v256f64 : ValueType<16384, 106>; // 256 x f64 vector value
-def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value
-def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value
+def nxv1i1 : ValueType<1, 107>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 108>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 109>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 110>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 111>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 112>; // n x 32 x i1 vector value
+def nxv64i1 : ValueType<64,113>; // n x 64 x i1 vector value
-def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value
-def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value
+def nxv1i8 : ValueType<8, 114>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 115>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 116>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 117>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 118>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 119>; // n x 32 x i8 vector value
+def nxv64i8 : ValueType<512, 120>; // n x 64 x i8 vector value
-def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value
-def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value
+def nxv1i16 : ValueType<16, 121>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 122>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 123>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 124>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 125>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 126>; // n x 32 x i16 vector value
-def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value
-def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 94>; // 8 x f16 vector value
-def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value
-def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 98>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 99>; // 5 x f32 vector value
-def v8f32 : ValueType<256, 100>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 101>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value
-def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value
-def v1f64 : ValueType<64, 109>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 110>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 111>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 112>; // 8 x f64 vector value
+def nxv1i32 : ValueType<32, 127>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 128>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 129>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 130>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 131>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,132>; // n x 32 x i32 vector value
-def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value
-def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value
-def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value
+def nxv1i64 : ValueType<64, 133>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 134>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 135>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 136>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,137>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,138>; // n x 32 x i64 vector value
-def x86mmx : ValueType<64 , 125>; // X86 MMX value
-def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 127>; // Produces no value
-def untyped: ValueType<8 , 128>; // Produces an untyped value
-def ExceptRef: ValueType<0, 129>; // WebAssembly's except_ref type
+def nxv1f16 : ValueType<32, 139>; // n x 1 x f16 vector value
+def nxv2f16 : ValueType<32 , 140>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64 , 141>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 142>; // n x 8 x f16 vector value
+def nxv16f16 : ValueType<256,143>; // n x 16 x f16 vector value
+def nxv32f16 : ValueType<512,144>; // n x 32 x f16 vector value
+def nxv2bf16 : ValueType<32 , 145>; // n x 2 x bf16 vector value
+def nxv4bf16 : ValueType<64 , 146>; // n x 4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 147>; // n x 8 x bf16 vector value
+def nxv1f32 : ValueType<32 , 148>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64 , 149>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 150>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 151>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 152>; // n x 16 x f32 vector value
+def nxv1f64 : ValueType<64, 153>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 154>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 155>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 156>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64 , 157>; // X86 MMX value
+def FlagVT : ValueType<0 , 158>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 159>; // Produces no value
+def untyped: ValueType<8 , 160>; // Produces an untyped value
+def funcref : ValueType<0 , 161>; // WebAssembly's funcref type
+def externref : ValueType<0 , 162>; // WebAssembly's externref type
+def x86amx : ValueType<8192, 163>; // X86 AMX value
+
+
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
diff --git a/linux-x64/clang/include/llvm/CodeGen/VirtRegMap.h b/linux-x64/clang/include/llvm/CodeGen/VirtRegMap.h
index 7a64d67..deef4b9 100644
--- a/linux-x64/clang/include/llvm/CodeGen/VirtRegMap.h
+++ b/linux-x64/clang/include/llvm/CodeGen/VirtRegMap.h
@@ -19,7 +19,7 @@
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/CodeGen/TileShapeInfo.h"
#include "llvm/Pass.h"
#include <cassert>
@@ -49,7 +49,7 @@
/// it; even spilled virtual registers (the register mapped to a
/// spilled register is the temporary used to load it from the
/// stack).
- IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+ IndexedMap<Register, VirtReg2IndexFunctor> Virt2PhysMap;
/// Virt2StackSlotMap - This is virtual register to stack slot
/// mapping. Each spilled virtual register has an entry in it
@@ -61,14 +61,20 @@
/// mapping.
IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+ /// Virt2ShapeMap - For X86 AMX register whose register is bound shape
+ /// information.
+ DenseMap<unsigned, ShapeT> Virt2ShapeMap;
+
/// createSpillSlot - Allocate a spill slot for RC from MFI.
unsigned createSpillSlot(const TargetRegisterClass *RC);
public:
static char ID;
- VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
- Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
+ VirtRegMap()
+ : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
+ MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
VirtRegMap(const VirtRegMap &) = delete;
VirtRegMap &operator=(const VirtRegMap &) = delete;
@@ -91,28 +97,43 @@
/// returns true if the specified virtual register is
/// mapped to a physical register
- bool hasPhys(unsigned virtReg) const {
+ bool hasPhys(Register virtReg) const {
return getPhys(virtReg) != NO_PHYS_REG;
}
/// returns the physical register mapped to the specified
/// virtual register
- Register getPhys(Register virtReg) const {
+ MCRegister getPhys(Register virtReg) const {
assert(virtReg.isVirtual());
- return Virt2PhysMap[virtReg];
+ return MCRegister::from(Virt2PhysMap[virtReg.id()]);
}
/// creates a mapping for the specified virtual register to
/// the specified physical register
- void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg);
+ void assignVirt2Phys(Register virtReg, MCPhysReg physReg);
+
+ bool isShapeMapEmpty() const { return Virt2ShapeMap.empty(); }
+
+ bool hasShape(Register virtReg) const {
+ return getShape(virtReg).isValid();
+ }
+
+ ShapeT getShape(Register virtReg) const {
+ assert(virtReg.isVirtual());
+ return Virt2ShapeMap.lookup(virtReg);
+ }
+
+ void assignVirt2Shape(Register virtReg, ShapeT shape) {
+ Virt2ShapeMap[virtReg.id()] = shape;
+ }
/// clears the specified virtual register's, physical
/// register mapping
- void clearVirt(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ void clearVirt(Register virtReg) {
+ assert(virtReg.isVirtual());
+ assert(Virt2PhysMap[virtReg.id()] != NO_PHYS_REG &&
"attempt to clear a not assigned virtual register");
- Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ Virt2PhysMap[virtReg.id()] = NO_PHYS_REG;
}
/// clears all virtual to physical register mappings
@@ -122,56 +143,60 @@
}
/// returns true if VirtReg is assigned to its preferred physreg.
- bool hasPreferredPhys(unsigned VirtReg);
+ bool hasPreferredPhys(Register VirtReg);
/// returns true if VirtReg has a known preferred register.
/// This returns false if VirtReg has a preference that is a virtual
/// register that hasn't been assigned yet.
- bool hasKnownPreference(unsigned VirtReg);
+ bool hasKnownPreference(Register VirtReg);
/// records virtReg is a split live interval from SReg.
- void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
- Virt2SplitMap[virtReg] = SReg;
+ void setIsSplitFromReg(Register virtReg, Register SReg) {
+ Virt2SplitMap[virtReg.id()] = SReg;
+ if (hasShape(SReg)) {
+ Virt2ShapeMap[virtReg.id()] = getShape(SReg);
+ }
}
/// returns the live interval virtReg is split from.
- unsigned getPreSplitReg(unsigned virtReg) const {
- return Virt2SplitMap[virtReg];
+ Register getPreSplitReg(Register virtReg) const {
+ return Virt2SplitMap[virtReg.id()];
}
/// getOriginal - Return the original virtual register that VirtReg descends
/// from through splitting.
/// A register that was not created by splitting is its own original.
/// This operation is idempotent.
- unsigned getOriginal(unsigned VirtReg) const {
- unsigned Orig = getPreSplitReg(VirtReg);
+ Register getOriginal(Register VirtReg) const {
+ Register Orig = getPreSplitReg(VirtReg);
return Orig ? Orig : VirtReg;
}
/// returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
- bool isAssignedReg(unsigned virtReg) const {
+ bool isAssignedReg(Register virtReg) const {
if (getStackSlot(virtReg) == NO_STACK_SLOT)
return true;
// Split register can be assigned a physical register as well as a
// stack slot or remat id.
- return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ return (Virt2SplitMap[virtReg.id()] &&
+ Virt2PhysMap[virtReg.id()] != NO_PHYS_REG);
}
/// returns the stack slot mapped to the specified virtual
/// register
- int getStackSlot(unsigned virtReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- return Virt2StackSlotMap[virtReg];
+ int getStackSlot(Register virtReg) const {
+ assert(virtReg.isVirtual());
+ return Virt2StackSlotMap[virtReg.id()];
}
/// create a mapping for the specifed virtual register to
/// the next available stack slot
- int assignVirt2StackSlot(unsigned virtReg);
+ int assignVirt2StackSlot(Register virtReg);
/// create a mapping for the specified virtual register to
/// the specified stack slot
- void assignVirt2StackSlot(unsigned virtReg, int SS);
+ void assignVirt2StackSlot(Register virtReg, int SS);
void print(raw_ostream &OS, const Module* M = nullptr) const override;
void dump() const;
diff --git a/linux-x64/clang/include/llvm/CodeGen/WasmEHFuncInfo.h b/linux-x64/clang/include/llvm/CodeGen/WasmEHFuncInfo.h
index 887a146..54e8c40 100644
--- a/linux-x64/clang/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/linux-x64/clang/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -15,12 +15,16 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerUnion.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/IR/BasicBlock.h"
namespace llvm {
+class BasicBlock;
+class Function;
+class MachineBasicBlock;
+
+namespace WebAssembly {
enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 };
+}
using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;