Update prebuilt Clang to match Android kernel.
Bug: 132428451
Change-Id: I8f6e2cb23f381fc0c02ddea99b867e58e925e5be
diff --git a/linux-x64/clang/include/llvm/MCA/Context.h b/linux-x64/clang/include/llvm/MCA/Context.h
new file mode 100644
index 0000000..a9f3e05
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Context.h
@@ -0,0 +1,68 @@
+//===---------------------------- Context.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units. A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_CONTEXT_H
+#define LLVM_MCA_CONTEXT_H
+
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/SourceMgr.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+/// This is a convenience struct to hold the parameters necessary for creating
+/// the pre-built "default" out-of-order pipeline.
+struct PipelineOptions {
+ PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
+ bool NoAlias)
+ : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
+ StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
+ unsigned DispatchWidth;
+ unsigned RegisterFileSize;
+ unsigned LoadQueueSize;
+ unsigned StoreQueueSize;
+ bool AssumeNoAlias;
+};
+
+class Context {
+ SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+
+public:
+ Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {}
+ Context(const Context &C) = delete;
+ Context &operator=(const Context &C) = delete;
+
+ void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
+ Hardware.push_back(std::move(H));
+ }
+
+ /// Construct a basic pipeline for simulating an out-of-order pipeline.
+ /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
+ std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
+ InstrBuilder &IB,
+ SourceMgr &SrcMgr);
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_CONTEXT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HWEventListener.h b/linux-x64/clang/include/llvm/MCA/HWEventListener.h
new file mode 100644
index 0000000..1857ad2
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HWEventListener.h
@@ -0,0 +1,155 @@
+//===----------------------- HWEventListener.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for hardware event listeners.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_HWEVENTLISTENER_H
+#define LLVM_MCA_HWEVENTLISTENER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+// An HWInstructionEvent represents state changes of instructions that
+// listeners might be interested in. Listeners can choose to ignore any event
+// they are not interested in.
+class HWInstructionEvent {
+public:
+ // This is the list of event types that are shared by all targets, that
+ // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
+ // ...) and generic Views can manipulate.
+ // Subtargets are free to define additional event types, that are goin to be
+ // handled by generic components as opaque values, but can still be
+ // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
+ // DispatchStage, ...) and interpreted by subtarget-specific EventListener
+ // implementations.
+ enum GenericEventType {
+ Invalid = 0,
+ // Events generated by the Retire Control Unit.
+ Retired,
+ // Events generated by the Scheduler.
+ Ready,
+ Issued,
+ Executed,
+ // Events generated by the Dispatch logic.
+ Dispatched,
+
+ LastGenericEventType,
+ };
+
+ HWInstructionEvent(unsigned type, const InstRef &Inst)
+ : Type(type), IR(Inst) {}
+
+ // The event type. The exact meaning depends on the subtarget.
+ const unsigned Type;
+
+ // The instruction this event was generated for.
+ const InstRef &IR;
+};
+
+class HWInstructionIssuedEvent : public HWInstructionEvent {
+public:
+ using ResourceRef = std::pair<uint64_t, uint64_t>;
+ HWInstructionIssuedEvent(const InstRef &IR,
+ ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
+ : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
+
+ ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
+};
+
+class HWInstructionDispatchedEvent : public HWInstructionEvent {
+public:
+ HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef<unsigned> Regs,
+ unsigned UOps)
+ : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
+ UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
+ // Number of physical register allocated for this instruction. There is one
+ // entry per register file.
+ ArrayRef<unsigned> UsedPhysRegs;
+ // Number of micro opcodes dispatched.
+ // This field is often set to the total number of micro-opcodes specified by
+ // the instruction descriptor of IR.
+ // The only exception is when IR declares a number of micro opcodes
+ // which exceeds the processor DispatchWidth, and - by construction - it
+ // requires multiple cycles to be fully dispatched. In that particular case,
+ // the dispatch logic would generate more than one dispatch event (one per
+ // cycle), and each event would declare how many micro opcodes are effectively
+ // been dispatched to the schedulers.
+ unsigned MicroOpcodes;
+};
+
+class HWInstructionRetiredEvent : public HWInstructionEvent {
+public:
+ HWInstructionRetiredEvent(const InstRef &IR, ArrayRef<unsigned> Regs)
+ : HWInstructionEvent(HWInstructionEvent::Retired, IR),
+ FreedPhysRegs(Regs) {}
+ // Number of register writes that have been architecturally committed. There
+ // is one entry per register file.
+ ArrayRef<unsigned> FreedPhysRegs;
+};
+
+// A HWStallEvent represents a pipeline stall caused by the lack of hardware
+// resources.
+class HWStallEvent {
+public:
+ enum GenericEventType {
+ Invalid = 0,
+ // Generic stall events generated by the DispatchStage.
+ RegisterFileStall,
+ RetireControlUnitStall,
+ // Generic stall events generated by the Scheduler.
+ DispatchGroupStall,
+ SchedulerQueueFull,
+ LoadQueueFull,
+ StoreQueueFull,
+ LastGenericEvent
+ };
+
+ HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {}
+
+ // The exact meaning of the stall event type depends on the subtarget.
+ const unsigned Type;
+
+ // The instruction this event was generated for.
+ const InstRef &IR;
+};
+
+class HWEventListener {
+public:
+ // Generic events generated by the pipeline.
+ virtual void onCycleBegin() {}
+ virtual void onCycleEnd() {}
+
+ virtual void onEvent(const HWInstructionEvent &Event) {}
+ virtual void onEvent(const HWStallEvent &Event) {}
+
+ using ResourceRef = std::pair<uint64_t, uint64_t>;
+ virtual void onResourceAvailable(const ResourceRef &RRef) {}
+
+ // Events generated by the Scheduler when buffered resources are
+ // consumed/freed for an instruction.
+ virtual void onReservedBuffers(const InstRef &Inst,
+ ArrayRef<unsigned> Buffers) {}
+ virtual void onReleasedBuffers(const InstRef &Inst,
+ ArrayRef<unsigned> Buffers) {}
+
+ virtual ~HWEventListener() {}
+
+private:
+ virtual void anchor();
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_HWEVENTLISTENER_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h
new file mode 100644
index 0000000..f6e178b
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -0,0 +1,32 @@
+//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a base class for describing a simulated hardware
+/// unit. These units are used to construct a simulated backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_HARDWAREUNIT_H
+#define LLVM_MCA_HARDWAREUNIT_H
+
+namespace llvm {
+namespace mca {
+
+class HardwareUnit {
+ HardwareUnit(const HardwareUnit &H) = delete;
+ HardwareUnit &operator=(const HardwareUnit &H) = delete;
+
+public:
+ HardwareUnit() = default;
+ virtual ~HardwareUnit();
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_HARDWAREUNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
new file mode 100644
index 0000000..b8a9f27
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -0,0 +1,206 @@
+//===------------------------- LSUnit.h --------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load/Store unit class that models load/store queues and that implements
+/// a simple weak memory consistency model.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_LSUNIT_H
+#define LLVM_MCA_LSUNIT_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+class Scheduler;
+
+/// A Load/Store Unit implementing a load and store queues.
+///
+/// This class implements a load queue and a store queue to emulate the
+/// out-of-order execution of memory operations.
+/// Each load (or store) consumes an entry in the load (or store) queue.
+///
+/// Rules are:
+/// 1) A younger load is allowed to pass an older load only if there are no
+/// stores nor barriers in between the two loads.
+/// 2) An younger store is not allowed to pass an older store.
+/// 3) A younger store is not allowed to pass an older load.
+/// 4) A younger load is allowed to pass an older store only if the load does
+/// not alias with the store.
+///
+/// This class optimistically assumes that loads don't alias store operations.
+/// Under this assumption, younger loads are always allowed to pass older
+/// stores (this would only affects rule 4).
+/// Essentially, this class doesn't perform any sort alias analysis to
+/// identify aliasing loads and stores.
+///
+/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
+/// set to `false` by the constructor of LSUnit.
+///
+/// Note that this class doesn't know about the existence of different memory
+/// types for memory operations (example: write-through, write-combining, etc.).
+/// Derived classes are responsible for implementing that extra knowledge, and
+/// provide different sets of rules for loads and stores by overriding method
+/// `isReady()`.
+/// To emulate a write-combining memory type, rule 2. must be relaxed in a
+/// derived class to enable the reordering of non-aliasing store operations.
+///
+/// No assumptions are made by this class on the size of the store buffer. This
+/// class doesn't know how to identify cases where store-to-load forwarding may
+/// occur.
+///
+/// LSUnit doesn't attempt to predict whether a load or store hits or misses
+/// the L1 cache. To be more specific, LSUnit doesn't know anything about
+/// cache hierarchy and memory types.
+/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
+/// scheduling model provides an "optimistic" load-to-use latency (which usually
+/// matches the load-to-use latency for when there is a hit in the L1D).
+/// Derived classes may expand this knowledge.
+///
+/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
+/// memory-barrier like instructions.
+/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
+/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
+/// serializes loads without forcing a flush of the load queue.
+/// Similarly, instructions that both `mayStore` and have `unmodeled side
+/// effects` are treated like store barriers. A full memory
+/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
+/// effects. This is obviously inaccurate, but this is the best that we can do
+/// at the moment.
+///
+/// Each load/store barrier consumes one entry in the load/store queue. A
+/// load/store barrier enforces ordering of loads/stores:
+/// - A younger load cannot pass a load barrier.
+/// - A younger store cannot pass a store barrier.
+///
+/// A younger load has to wait for the memory load barrier to execute.
+/// A load/store barrier is "executed" when it becomes the oldest entry in
+/// the load/store queue(s). That also means, all the older loads/stores have
+/// already been executed.
+class LSUnit : public HardwareUnit {
+ // Load queue size.
+ // LQ_Size == 0 means that there are infinite slots in the load queue.
+ unsigned LQ_Size;
+
+ // Store queue size.
+ // SQ_Size == 0 means that there are infinite slots in the store queue.
+ unsigned SQ_Size;
+
+ // If true, loads will never alias with stores. This is the default.
+ bool NoAlias;
+
+ // When a `MayLoad` instruction is dispatched to the schedulers for execution,
+ // the LSUnit reserves an entry in the `LoadQueue` for it.
+ //
+ // LoadQueue keeps track of all the loads that are in-flight. A load
+ // instruction is eventually removed from the LoadQueue when it reaches
+ // completion stage. That means, a load leaves the queue whe it is 'executed',
+ // and its value can be forwarded on the data path to outside units.
+ //
+ // This class doesn't know about the latency of a load instruction. So, it
+ // conservatively/pessimistically assumes that the latency of a load opcode
+ // matches the instruction latency.
+ //
+ // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
+ // and load/store conflicts, the latency of a load is determined by the depth
+ // of the load pipeline. So, we could use field `LoadLatency` in the
+ // MCSchedModel to model that latency.
+ // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
+ // L1D, and it usually already accounts for any extra latency due to data
+ // forwarding.
+ // When doing throughput analysis, `LoadLatency` is likely to
+ // be a better predictor of load latency than instruction latency. This is
+ // particularly true when simulating code with temporal/spatial locality of
+ // memory accesses.
+ // Using `LoadLatency` (instead of the instruction latency) is also expected
+ // to improve the load queue allocation for long latency instructions with
+ // folded memory operands (See PR39829).
+ //
+ // FIXME: On some processors, load/store operations are split into multiple
+ // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
+ // not 256-bit data types. So, a 256-bit load is effectively split into two
+ // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
+ // simplicity, this class optimistically assumes that a load instruction only
+ // consumes one entry in the LoadQueue. Similarly, store instructions only
+ // consume a single entry in the StoreQueue.
+ // In future, we should reassess the quality of this design, and consider
+ // alternative approaches that let instructions specify the number of
+ // load/store queue entries which they consume at dispatch stage (See
+ // PR39830).
+ SmallSet<unsigned, 16> LoadQueue;
+ SmallSet<unsigned, 16> StoreQueue;
+
+ void assignLQSlot(unsigned Index);
+ void assignSQSlot(unsigned Index);
+ bool isReadyNoAlias(unsigned Index) const;
+
+ // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
+ // conservatively treated as a store barrier. It forces older store to be
+ // executed before newer stores are issued.
+ SmallSet<unsigned, 8> StoreBarriers;
+
+ // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
+ // conservatively treated as a load barrier. It forces older loads to execute
+ // before newer loads are issued.
+ SmallSet<unsigned, 8> LoadBarriers;
+
+ bool isSQEmpty() const { return StoreQueue.empty(); }
+ bool isLQEmpty() const { return LoadQueue.empty(); }
+ bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
+ bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+
+public:
+ LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
+ bool AssumeNoAlias = false);
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+
+ enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+
+ // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
+ // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
+ Status isAvailable(const InstRef &IR) const;
+
+ // Allocates load/store queue resources for IR.
+ //
+ // This method assumes that a previous call to `isAvailable(IR)` returned
+ // LSU_AVAILABLE, and that IR is a memory operation.
+ void dispatch(const InstRef &IR);
+
+ // By default, rules are:
+ // 1. A store may not pass a previous store.
+ // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+ // 3. A load may pass a previous load.
+ // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+ // 5. A load has to wait until an older load barrier is fully executed.
+ // 6. A store has to wait until an older store barrier is fully executed.
+ virtual bool isReady(const InstRef &IR) const;
+
+ // Load and store instructions are tracked by their corresponding queues from
+ // dispatch until the "instruction executed" event.
+ // Only when a load instruction reaches the 'Executed' stage, its value
+ // becomes available to the users. At that point, the load no longer needs to
+ // be tracked by the load queue.
+ // FIXME: For simplicity, we optimistically assume a similar behavior for
+ // store instructions. In practice, store operations don't tend to leave the
+ // store queue until they reach the 'Retired' stage (See PR39830).
+ void onInstructionExecuted(const InstRef &IR);
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_LSUNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
new file mode 100644
index 0000000..3650632
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -0,0 +1,239 @@
+//===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class. This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_REGISTER_FILE_H
+#define LLVM_MCA_REGISTER_FILE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+class ReadState;
+class WriteState;
+class WriteRef;
+
+/// Manages hardware register files, and tracks register definitions for
+/// register renaming purposes.
+class RegisterFile : public HardwareUnit {
+ const MCRegisterInfo &MRI;
+
+ // class RegisterMappingTracker is a physical register file (PRF) descriptor.
+ // There is one RegisterMappingTracker for every PRF definition in the
+ // scheduling model.
+ //
+ // An instance of RegisterMappingTracker tracks the number of physical
+ // registers available for renaming. It also tracks the number of register
+ // moves eliminated per cycle.
+ struct RegisterMappingTracker {
+ // The total number of physical registers that are available in this
+ // register file for register renaming purpouses. A value of zero for this
+ // field means: this register file has an unbounded number of physical
+ // registers.
+ const unsigned NumPhysRegs;
+ // Number of physical registers that are currently in use.
+ unsigned NumUsedPhysRegs;
+
+ // Maximum number of register moves that can be eliminated by this PRF every
+ // cycle. A value of zero means that there is no limit in the number of
+ // moves which can be eliminated every cycle.
+ const unsigned MaxMoveEliminatedPerCycle;
+
+ // Number of register moves eliminated during this cycle.
+ //
+ // This value is increased by one every time a register move is eliminated.
+ // Every new cycle, this value is reset to zero.
+ // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
+ // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
+ unsigned NumMoveEliminated;
+
+ // If set, move elimination is restricted to zero-register moves only.
+ bool AllowZeroMoveEliminationOnly;
+
+ RegisterMappingTracker(unsigned NumPhysRegisters,
+ unsigned MaxMoveEliminated = 0U,
+ bool AllowZeroMoveElimOnly = false)
+ : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
+ MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U),
+ AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {}
+ };
+
+ // A vector of register file descriptors. This set always contains at least
+ // one entry. Entry at index #0 is reserved. That entry describes a register
+ // file with an unbounded number of physical registers that "sees" all the
+ // hardware registers declared by the target (i.e. all the register
+ // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is
+ // the target name).
+ //
+ // Users can limit the number of physical registers that are available in
+ // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
+ SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+
+ // This type is used to propagate information about the owner of a register,
+ // and the cost of allocating it in the PRF. Register cost is defined as the
+ // number of physical registers consumed by the PRF to allocate a user
+ // register.
+ //
+ // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
+ // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
+ using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
+
+ // Struct RegisterRenamingInfo is used to map logical registers to register
+ // files.
+ //
+ // There is a RegisterRenamingInfo object for every logical register defined
+ // by the target. RegisteRenamingInfo objects are stored into vector
+ // `RegisterMappings`, and MCPhysReg IDs can be used to reference
+ // elements in that vector.
+ //
+ // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
+ // specifies both the owning PRF, as well as the number of physical registers
+ // consumed at register renaming stage.
+ //
+ // Field `AllowMoveElimination` is set for registers that are used as
+ // destination by optimizable register moves.
+ //
+ // Field `AliasRegID` is set by writes from register moves that have been
+ // eliminated at register renaming stage. A move eliminated at register
+ // renaming stage is effectively bypassed, and its write aliases the source
+ // register definition.
+ struct RegisterRenamingInfo {
+ IndexPlusCostPairTy IndexPlusCost;
+ MCPhysReg RenameAs;
+ MCPhysReg AliasRegID;
+ bool AllowMoveElimination;
+ RegisterRenamingInfo()
+ : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U),
+ AllowMoveElimination(false) {}
+ };
+
+ // RegisterMapping objects are mainly used to track physical register
+ // definitions and resolve data dependencies.
+ //
+ // Every register declared by the Target is associated with an instance of
+ // RegisterMapping. RegisterMapping objects keep track of writes to a logical
+ // register. That information is used by class RegisterFile to resolve data
+ // dependencies, and correctly set latencies for register uses.
+ //
+ // This implementation does not allow overlapping register files. The only
+ // register file that is allowed to overlap with other register files is
+ // register file #0. If we exclude register #0, every register is "owned" by
+ // at most one register file.
+ using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
+
+ // There is one entry per each register defined by the target.
+ std::vector<RegisterMapping> RegisterMappings;
+
+ // Used to track zero registers. There is one bit for each register defined by
+ // the target. Bits are set for registers that are known to be zero.
+ APInt ZeroRegisters;
+
+ // This method creates a new register file descriptor.
+ // The new register file owns all of the registers declared by register
+ // classes in the 'RegisterClasses' set.
+ //
+ // Processor models allow the definition of RegisterFile(s) via tablegen. For
+ // example, this is a tablegen definition for a x86 register file for
+ // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1
+ // physical register).
+ //
+ // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]>
+ //
+ // Here FPRegisterFile contains all the registers defined by register class
+ // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
+ // registers which can be used for register renaming purpose.
+ void addRegisterFile(const MCRegisterFileDesc &RF,
+ ArrayRef<MCRegisterCostEntry> Entries);
+
+ // Consumes physical registers in each register file specified by the
+ // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
+ void allocatePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> UsedPhysRegs);
+
+ // Releases previously allocated physical registers from the register file(s).
+ // This method is called from `invalidateRegisterMapping()`.
+ void freePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> FreedPhysRegs);
+
+ // Collects writes that are in a RAW dependency with RS.
+ // This method is called from `addRegisterRead()`.
+ void collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const;
+
+ // Create an instance of RegisterMappingTracker for every register file
+ // specified by the processor model.
+ // If no register file is specified, then this method creates a default
+ // register file with an unbounded number of physical registers.
+ void initialize(const MCSchedModel &SM, unsigned NumRegs);
+
+public:
+ RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+ unsigned NumRegs = 0);
+
+ // This method updates the register mappings inserting a new register
+ // definition. This method is also responsible for updating the number of
+ // allocated physical registers in each register file modified by the write.
+ // No physical regiser is allocated if this write is from a zero-idiom.
+ void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
+
+ // Collect writes that are in a data dependency with RS, and update RS
+ // internal state.
+ void addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const;
+
+ // Removes write \param WS from the register mappings.
+ // Physical registers may be released to reflect this update.
+ // No registers are released if this write is from a zero-idiom.
+ void removeRegisterWrite(const WriteState &WS,
+ MutableArrayRef<unsigned> FreedPhysRegs);
+
+ // Returns true if a move from RS to WS can be eliminated.
+ // On success, it updates WriteState by setting flag `WS.isEliminated`.
+ // If RS is a read from a zero register, and WS is eliminated, then
+ // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
+ // reserve a physical register for it.
+ bool tryEliminateMove(WriteState &WS, ReadState &RS);
+
+ // Checks if there are enough physical registers in the register files.
+ // Returns a "response mask" where each bit represents the response from a
+ // different register file. A mask of all zeroes means that all register
+ // files are available. Otherwise, the mask can be used to identify which
+ // register file was busy. This sematic allows us to classify dispatch
+ // stalls caused by the lack of register file resources.
+ //
+ // Current implementation can simulate up to 32 register files (including the
+ // special register file at index #0).
+ unsigned isAvailable(ArrayRef<unsigned> Regs) const;
+
+ // Returns the number of PRFs implemented by this processor.
+ unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
+
+ // Notify each PRF that a new cycle just started.
+ void cycleStart();
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_REGISTER_FILE_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
new file mode 100644
index 0000000..3addaaf
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -0,0 +1,425 @@
+//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy. These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RESOURCE_MANAGER_H
+#define LLVM_MCA_RESOURCE_MANAGER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+/// Used to notify the internal state of a processor resource.
+///
+/// A processor resource is available if it is not reserved, and there are
+/// available slots in the buffer. A processor resource is unavailable if it
+/// is either reserved, or the associated buffer is full. A processor resource
+/// with a buffer size of -1 is always available if it is not reserved.
+///
+/// Values of type ResourceStateEvent are returned by method
+/// ResourceState::isBufferAvailable(), which is used to query the internal
+/// state of a resource.
+///
+/// The naming convention for resource state events is:
+/// * Event names start with prefix RS_
+/// * Prefix RS_ is followed by a string describing the actual resource state.
+enum ResourceStateEvent {
+ RS_BUFFER_AVAILABLE,
+ RS_BUFFER_UNAVAILABLE,
+ RS_RESERVED
+};
+
+/// Resource allocation strategy used by hardware scheduler resources.
+class ResourceStrategy {
+ ResourceStrategy(const ResourceStrategy &) = delete;
+ ResourceStrategy &operator=(const ResourceStrategy &) = delete;
+
+public:
+ ResourceStrategy() {}
+ virtual ~ResourceStrategy();
+
+ /// Selects a processor resource unit from a ReadyMask.
+ virtual uint64_t select(uint64_t ReadyMask) = 0;
+
+ /// Called by the ResourceManager when a processor resource group, or a
+ /// processor resource with multiple units has become unavailable.
+ ///
+ /// The default strategy uses this information to bias its selection logic.
+ virtual void used(uint64_t ResourceMask) {}
+};
+
+/// Default resource allocation strategy used by processor resource groups and
+/// processor resources with multiple units.
+class DefaultResourceStrategy final : public ResourceStrategy {
+ /// A Mask of resource unit identifiers.
+ ///
+ /// There is one bit set for every available resource unit.
+ /// It defaults to the value of field ResourceSizeMask in ResourceState.
+ const uint64_t ResourceUnitMask;
+
+ /// A simple round-robin selector for processor resource units.
+ /// Each bit of this mask identifies a sub resource within a group.
+ ///
+ /// As an example, lets assume that this is a default policy for a
+ /// processor resource group composed by the following three units:
+ /// ResourceA -- 0b001
+ /// ResourceB -- 0b010
+ /// ResourceC -- 0b100
+ ///
+ /// Field NextInSequenceMask is used to select the next unit from the set of
+ /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
+ /// this example, it defaults to mask '0b111').
+ ///
+ /// The round-robin selector would firstly select 'ResourceC', then
+ /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the
+ /// corresponding bit in NextInSequenceMask is cleared. For example, if
+ /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
+ /// 0xb011.
+ ///
+ /// When NextInSequenceMask becomes zero, it is automatically reset to the
+ /// default value (i.e. ResourceUnitMask).
+ uint64_t NextInSequenceMask;
+
+ /// This field is used to track resource units that are used (i.e. selected)
+ /// by other groups other than the one associated with this strategy object.
+ ///
+ /// In LLVM processor resource groups are allowed to partially (or fully)
+ /// overlap. That means, a same unit may be visible to multiple groups.
+ /// This field keeps track of uses that have originated from outside of
+ /// this group. The idea is to bias the selection strategy, so that resources
+ /// that haven't been used by other groups get prioritized.
+ ///
+ /// The end goal is to (try to) keep the resource distribution as much uniform
+ /// as possible. By construction, this mask only tracks one-level of resource
+ /// usage. Therefore, this strategy is expected to be less accurate when same
+ /// units are used multiple times by other groups within a single round of
+ /// select.
+ ///
+ /// Note: an LRU selector would have a better accuracy at the cost of being
+ /// slightly more expensive (mostly in terms of runtime cost). Methods
+ /// 'select' and 'used', are always in the hot execution path of llvm-mca.
+ /// Therefore, a slow implementation of 'select' would have a negative impact
+ /// on the overall performance of the tool.
+ uint64_t RemovedFromNextInSequence;
+
+public:
+ DefaultResourceStrategy(uint64_t UnitMask)
+ : ResourceStrategy(), ResourceUnitMask(UnitMask),
+ NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
+ virtual ~DefaultResourceStrategy() = default;
+
+ uint64_t select(uint64_t ReadyMask) override;
+ void used(uint64_t Mask) override;
+};
+
+/// A processor resource descriptor.
+///
+/// There is an instance of this class for every processor resource defined by
+/// the machine scheduling model.
+/// Objects of class ResourceState dynamically track the usage of processor
+/// resource units.
+class ResourceState {
+ /// An index to the MCProcResourceDesc entry in the processor model.
+ const unsigned ProcResourceDescIndex;
+ /// A resource mask. This is generated by the tool with the help of
+ /// function `mca::computeProcResourceMasks' (see Support.h).
+ ///
+ /// Field ResourceMask only has one bit set if this resource state describes a
+ /// processor resource unit (i.e. this is not a group). That means, we can
+ /// quickly check if a resource is a group by simply counting the number of
+ /// bits that are set in the mask.
+ ///
+ /// The most significant bit of a mask (MSB) uniquely identifies a resource.
+ /// Remaining bits are used to describe the composition of a group (Group).
+ ///
+ /// Example (little endian):
+ /// Resource | Mask | MSB | Group
+ /// ---------+------------+------------+------------
+ /// A | 0b000001 | 0b000001 | 0b000000
+ /// | | |
+ /// B | 0b000010 | 0b000010 | 0b000000
+ /// | | |
+ /// C | 0b010000 | 0b010000 | 0b000000
+ /// | | |
+ /// D | 0b110010 | 0b100000 | 0b010010
+ ///
+ /// In this example, resources A, B and C are processor resource units.
+ /// Only resource D is a group resource, and it contains resources B and C.
+ /// That is because MSB(B) and MSB(C) are both contained within Group(D).
+ const uint64_t ResourceMask;
+
+ /// A ProcResource can have multiple units.
+ ///
+ /// For processor resource groups this field is a mask of contained resource
+ /// units. It is obtained from ResourceMask by clearing the highest set bit.
+ /// The number of resource units in a group can be simply computed as the
+ /// population count of this field.
+ ///
+ /// For normal (i.e. non-group) resources, the number of bits set in this mask
+ /// is equivalent to the number of units declared by the processor model (see
+ /// field 'NumUnits' in 'ProcResourceUnits').
+ uint64_t ResourceSizeMask;
+
+ /// A mask of ready units.
+ uint64_t ReadyMask;
+
+ /// Buffered resources will have this field set to a positive number different
+ /// than zero. A buffered resource behaves like a reservation station
+ /// implementing its own buffer for out-of-order execution.
+ ///
+ /// A BufferSize of 1 is used by scheduler resources that force in-order
+ /// execution.
+ ///
+ /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
+ /// Since in-order issue/dispatch resources don't implement buffers, dispatch
+ /// events coincide with issue events.
+ /// Also, no other instruction ca be dispatched/issue while this resource is
+ /// in use. Only when all the "resource cycles" are consumed (after the issue
+ /// event), a new instruction ca be dispatched.
+ const int BufferSize;
+
+ /// Available slots in the buffer (zero, if this is not a buffered resource).
+ unsigned AvailableSlots;
+
+ /// This field is set if this resource is currently reserved.
+ ///
+ /// Resources can be reserved for a number of cycles.
+ /// Instructions can still be dispatched to reserved resources. However,
+ /// istructions dispatched to a reserved resource cannot be issued to the
+ /// underlying units (i.e. pipelines) until the resource is released.
+ bool Unavailable;
+
+ const bool IsAGroup;
+
+ /// Checks for the availability of unit 'SubResMask' in the group.
+ bool isSubResourceReady(uint64_t SubResMask) const {
+ return ReadyMask & SubResMask;
+ }
+
+public:
+ ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
+
+ unsigned getProcResourceID() const { return ProcResourceDescIndex; }
+ uint64_t getResourceMask() const { return ResourceMask; }
+ uint64_t getReadyMask() const { return ReadyMask; }
+ int getBufferSize() const { return BufferSize; }
+
+ bool isBuffered() const { return BufferSize > 0; }
+ bool isInOrder() const { return BufferSize == 1; }
+
+ /// Returns true if this is an in-order dispatch/issue resource.
+ bool isADispatchHazard() const { return BufferSize == 0; }
+ bool isReserved() const { return Unavailable; }
+
+ void setReserved() { Unavailable = true; }
+ void clearReserved() { Unavailable = false; }
+
+ /// Returs true if this resource is not reserved, and if there are at least
+ /// `NumUnits` available units.
+ bool isReady(unsigned NumUnits = 1) const;
+
+ bool isAResourceGroup() const { return IsAGroup; }
+
+ bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
+
+ void markSubResourceAsUsed(uint64_t ID) {
+ assert(isSubResourceReady(ID));
+ ReadyMask ^= ID;
+ }
+
+ void releaseSubResource(uint64_t ID) {
+ assert(!isSubResourceReady(ID));
+ ReadyMask ^= ID;
+ }
+
+ unsigned getNumUnits() const {
+ return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask);
+ }
+
+ /// Checks if there is an available slot in the resource buffer.
+ ///
+ /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
+ /// there is a slot available.
+ ///
+ /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
+ /// is reserved.
+ ///
+ /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
+ ResourceStateEvent isBufferAvailable() const;
+
+ /// Reserve a slot in the buffer.
+ void reserveBuffer() {
+ if (AvailableSlots)
+ AvailableSlots--;
+ }
+
+ /// Release a slot in the buffer.
+ void releaseBuffer() {
+ if (BufferSize > 0)
+ AvailableSlots++;
+ assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+ }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+/// A resource unit identifier.
+///
+/// This is used to identify a specific processor resource unit using a pair
+/// of indices where the 'first' index is a processor resource mask, and the
+/// 'second' index is an index for a "sub-resource" (i.e. unit).
+typedef std::pair<uint64_t, uint64_t> ResourceRef;
+
+// First: a MCProcResourceDesc index identifying a buffered resource.
+// Second: max number of buffer entries used in this resource.
+typedef std::pair<unsigned, unsigned> BufferUsageEntry;
+
+/// A resource manager for processor resource units and groups.
+///
+/// This class owns all the ResourceState objects, and it is responsible for
+/// acting on requests from a Scheduler by updating the internal state of
+/// ResourceState objects.
+/// This class doesn't know about instruction itineraries and functional units.
+/// In future, it can be extended to support itineraries too through the same
+/// public interface.
+class ResourceManager {
+ // Set of resources available on the subtarget.
+ //
+ // There is an instance of ResourceState for every resource declared by the
+ // target scheduling model.
+ //
+ // Elements of this vector are ordered by resource kind. In particular,
+ // resource units take precedence over resource groups.
+ //
+ // The index of a processor resource in this vector depends on the value of
+ // its mask (see the description of field ResourceState::ResourceMask). In
+ // particular, it is computed as the position of the most significant bit set
+ // (MSB) in the mask plus one (since we want to ignore the invalid resource
+ // descriptor at index zero).
+ //
+ // Example (little endian):
+ //
+ // Resource | Mask | MSB | Index
+ // ---------+---------+---------+-------
+ // A | 0b00001 | 0b00001 | 1
+ // | | |
+ // B | 0b00100 | 0b00100 | 3
+ // | | |
+ // C | 0b10010 | 0b10000 | 5
+ //
+ //
+ // The same index is also used to address elements within vector `Strategies`
+ // and vector `Resource2Groups`.
+ std::vector<std::unique_ptr<ResourceState>> Resources;
+ std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
+
+ // Used to quickly identify groups that own a particular resource unit.
+ std::vector<uint64_t> Resource2Groups;
+
+ // A table to map processor resource IDs to processor resource masks.
+ SmallVector<uint64_t, 8> ProcResID2Mask;
+
+ // Keeps track of which resources are busy, and how many cycles are left
+ // before those become usable again.
+ SmallDenseMap<ResourceRef, unsigned> BusyResources;
+
+ // Set of processor resource units available on the target.
+ uint64_t ProcResUnitMask;
+
+ // Set of processor resource units that are available during this cycle.
+ uint64_t AvailableProcResUnits;
+
+ // Set of processor resource groups that are currently reserved.
+ uint64_t ReservedResourceGroups;
+
+ // Returns the actual resource unit that will be used.
+ ResourceRef selectPipe(uint64_t ResourceID);
+
+ void use(const ResourceRef &RR);
+ void release(const ResourceRef &RR);
+
+ unsigned getNumUnits(uint64_t ResourceID) const;
+
+ // Overrides the selection strategy for the processor resource with the given
+ // mask.
+ void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+ uint64_t ResourceMask);
+
+public:
+ ResourceManager(const MCSchedModel &SM);
+ virtual ~ResourceManager() = default;
+
+ // Overrides the selection strategy for the resource at index ResourceID in
+ // the MCProcResourceDesc table.
+ void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
+ unsigned ResourceID) {
+ assert(ResourceID < ProcResID2Mask.size() &&
+ "Invalid resource index in input!");
+ return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
+ }
+
+ // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
+ // there are enough available slots in the buffers.
+ ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+
+ // Return the processor resource identifier associated to this Mask.
+ unsigned resolveResourceMask(uint64_t Mask) const;
+
+ // Consume a slot in every buffered resource from array 'Buffers'. Resource
+ // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+ void reserveBuffers(ArrayRef<uint64_t> Buffers);
+
+ // Release buffer entries previously allocated by method reserveBuffers.
+ void releaseBuffers(ArrayRef<uint64_t> Buffers);
+
+ // Reserve a processor resource. A reserved resource is not available for
+ // instruction issue until it is released.
+ void reserveResource(uint64_t ResourceID);
+
+ // Release a previously reserved processor resource.
+ void releaseResource(uint64_t ResourceID);
+
+ // Returns a zero mask if resources requested by Desc are all available during
+ // this cycle. It returns a non-zero mask value only if there are unavailable
+ // processor resources; each bit set in the mask represents a busy processor
+ // resource unit or a reserved processor resource group.
+ uint64_t checkAvailability(const InstrDesc &Desc) const;
+
+ uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
+ uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
+
+ void issueInstruction(
+ const InstrDesc &Desc,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+ void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
+
+#ifndef NDEBUG
+ void dump() const {
+ for (const std::unique_ptr<ResourceState> &Resource : Resources)
+ Resource->dump();
+ }
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RESOURCE_MANAGER_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
new file mode 100644
index 0000000..0629014
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -0,0 +1,103 @@
+//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#define LLVM_MCA_RETIRE_CONTROL_UNIT_H
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include <vector>
+
+namespace llvm {
+namespace mca {
+
+/// This class tracks which instructions are in-flight (i.e., dispatched but not
+/// retired) in the OoO backend.
+//
+/// This class checks on every cycle if/which instructions can be retired.
+/// Instructions are retired in program order.
+/// In the event of an instruction being retired, the pipeline that owns
+/// this RetireControlUnit (RCU) gets notified.
+///
+/// On instruction retired, register updates are all architecturally
+/// committed, and any physicall registers previously allocated for the
+/// retired instruction are freed.
+struct RetireControlUnit : public HardwareUnit {
+ // A RUToken is created by the RCU for every instruction dispatched to the
+ // schedulers. These "tokens" are managed by the RCU in its token Queue.
+ //
+ // On every cycle ('cycleEvent'), the RCU iterates through the token queue
+ // looking for any token with its 'Executed' flag set. If a token has that
+ // flag set, then the instruction has reached the write-back stage and will
+ // be retired by the RCU.
+ //
+ // 'NumSlots' represents the number of entries consumed by the instruction in
+ // the reorder buffer. Those entries will become available again once the
+ // instruction is retired.
+ //
+ // Note that the size of the reorder buffer is defined by the scheduling
+ // model via field 'NumMicroOpBufferSize'.
+ struct RUToken {
+ InstRef IR;
+ unsigned NumSlots; // Slots reserved to this instruction.
+ bool Executed; // True if the instruction is past the WB stage.
+ };
+
+private:
+ unsigned NextAvailableSlotIdx;
+ unsigned CurrentInstructionSlotIdx;
+ unsigned AvailableSlots;
+ unsigned MaxRetirePerCycle; // 0 means no limit.
+ std::vector<RUToken> Queue;
+
+public:
+ RetireControlUnit(const MCSchedModel &SM);
+
+ bool isEmpty() const { return AvailableSlots == Queue.size(); }
+ bool isAvailable(unsigned Quantity = 1) const {
+ // Some instructions may declare a number of uOps which exceeds the size
+ // of the reorder buffer. To avoid problems, cap the amount of slots to
+ // the size of the reorder buffer.
+ Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+
+ // Further normalize the number of micro opcodes for instructions that
+ // declare zero opcodes. This should match the behavior of method
+ // reserveSlot().
+ Quantity = std::max(Quantity, 1U);
+ return AvailableSlots >= Quantity;
+ }
+
+ unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
+
+ // Reserves a number of slots, and returns a new token.
+ unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
+
+ // Return the current token from the RCU's circular token queue.
+ const RUToken &peekCurrentToken() const;
+
+ // Advance the pointer to the next token in the circular token queue.
+ void consumeCurrentToken();
+
+ // Update the RCU token to represent the executed state.
+ void onInstructionExecuted(unsigned TokenID);
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
new file mode 100644
index 0000000..f1cfcbe
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -0,0 +1,250 @@
+//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A scheduler for Processor Resource Units and Processor Resource Groups.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SCHEDULER_H
+#define LLVM_MCA_SCHEDULER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStrategy {
+public:
+ SchedulerStrategy() = default;
+ virtual ~SchedulerStrategy();
+
+ /// Returns true if Lhs should take priority over Rhs.
+ ///
+ /// This method is used by class Scheduler to select the "best" ready
+ /// instruction to issue to the underlying pipelines.
+ virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
+};
+
+/// Default instruction selection strategy used by class Scheduler.
+class DefaultSchedulerStrategy : public SchedulerStrategy {
+ /// This method ranks instructions based on their age, and the number of known
+ /// users. The lower the rank value, the better.
+ int computeRank(const InstRef &Lhs) const {
+ return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
+ }
+
+public:
+ DefaultSchedulerStrategy() = default;
+ virtual ~DefaultSchedulerStrategy();
+
+ bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
+ int LhsRank = computeRank(Lhs);
+ int RhsRank = computeRank(Rhs);
+
+ /// Prioritize older instructions over younger instructions to minimize the
+ /// pressure on the reorder buffer.
+ if (LhsRank == RhsRank)
+ return Lhs.getSourceIndex() < Rhs.getSourceIndex();
+ return LhsRank < RhsRank;
+ }
+};
+
+/// Class Scheduler is responsible for issuing instructions to pipeline
+/// resources.
+///
+/// Internally, it delegates to a ResourceManager the management of processor
+/// resources. This class is also responsible for tracking the progress of
+/// instructions from the dispatch stage, until the write-back stage.
+///
+class Scheduler : public HardwareUnit {
+ LSUnit &LSU;
+
+ // Instruction selection strategy for this Scheduler.
+ std::unique_ptr<SchedulerStrategy> Strategy;
+
+ // Hardware resources that are managed by this scheduler.
+ std::unique_ptr<ResourceManager> Resources;
+
+ // Instructions dispatched to the Scheduler are internally classified based on
+ // the instruction stage (see Instruction::InstrStage).
+ //
+ // An Instruction dispatched to the Scheduler is added to the WaitSet if not
+ // all its register operands are available, and at least one latency is unknown.
+ // By construction, the WaitSet only contains instructions that are in the
+ // IS_DISPATCHED stage.
+ //
+ // An Instruction transitions from the WaitSet to the PendingSet if the
+ // instruction is not ready yet, but the latency of every register read is known.
+ // Instructions in the PendingSet are expected to be in the IS_PENDING stage.
+ //
+ // Instructions in the PendingSet are immediately dominated only by
+ // instructions that have already been issued to the underlying pipelines.
+ // In the presence of bottlenecks caused by data dependencies, the PendingSet
+ // can be inspected to identify problematic data dependencies between
+ // instructions.
+ //
+ // An instruction is moved to the ReadySet when all register operands become
+ // available, and all memory dependencies are met. Instructions that are
+ // moved from the PendingSet to the ReadySet transition in state from
+ // 'IS_PENDING' to 'IS_READY'.
+ //
+ // On every cycle, the Scheduler checks if it can promote instructions from the
+ // PendingSet to the ReadySet.
+ //
+ // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts
+ // exection. This event also causes an instruction state transition (i.e. from
+ // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet
+ // only when it reaches the write-back stage.
+ std::vector<InstRef> WaitSet;
+ std::vector<InstRef> PendingSet;
+ std::vector<InstRef> ReadySet;
+ std::vector<InstRef> IssuedSet;
+
+ // A mask of busy resource units. It defaults to the empty set (i.e. a zero
+ // mask), and it is cleared at the beginning of every cycle.
+ // It is updated every time the scheduler fails to issue an instruction from
+ // the ready set due to unavailable pipeline resources.
+ // Each bit of the mask represents an unavailable resource.
+ uint64_t BusyResourceUnits;
+
+ /// Verify the given selection strategy and set the Strategy member
+ /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is
+ /// used.
+ void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
+
+ /// Issue an instruction without updating the ready queue.
+ void issueInstructionImpl(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+ // Identify instructions that have finished executing, and remove them from
+ // the IssuedSet. References to executed instructions are added to input
+ // vector 'Executed'.
+ void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
+
+ // Try to promote instructions from the PendingSet to the ReadySet.
+ // Add promoted instructions to the 'Ready' vector in input.
+ // Returns true if at least one instruction was promoted.
+ bool promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+ // Try to promote instructions from the WaitSet to the PendingSet.
+ // Returns true if at least one instruction was promoted.
+ bool promoteToPendingSet();
+
+public:
+ Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+ : Scheduler(Model, Lsu, nullptr) {}
+
+ Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+ std::unique_ptr<SchedulerStrategy> SelectStrategy)
+ : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+ std::move(SelectStrategy)) {}
+
+ Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+ std::unique_ptr<SchedulerStrategy> SelectStrategy)
+ : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0) {
+ initializeStrategy(std::move(SelectStrategy));
+ }
+
+ // Stalls generated by the scheduler.
+ enum Status {
+ SC_AVAILABLE,
+ SC_LOAD_QUEUE_FULL,
+ SC_STORE_QUEUE_FULL,
+ SC_BUFFERS_FULL,
+ SC_DISPATCH_GROUP_STALL,
+ };
+
+ /// Check if the instruction in 'IR' can be dispatched and returns an answer
+ /// in the form of a Status value.
+ ///
+ /// The DispatchStage is responsible for querying the Scheduler before
+ /// dispatching new instructions. This routine is used for performing such
+ /// a query. If the instruction 'IR' can be dispatched, then true is
+ /// returned, otherwise false is returned with Event set to the stall type.
+ /// Internally, it also checks if the load/store unit is available.
+ Status isAvailable(const InstRef &IR) const;
+
+ /// Reserves buffer and LSUnit queue resources that are necessary to issue
+ /// this instruction.
+ ///
+ /// Returns true if instruction IR is ready to be issued to the underlying
+ /// pipelines. Note that this operation cannot fail; it assumes that a
+ /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
+ void dispatch(const InstRef &IR);
+
+ /// Returns true if IR is ready to be executed by the underlying pipelines.
+ /// This method assumes that IR has been previously dispatched.
+ bool isReady(const InstRef &IR) const;
+
+ /// Issue an instruction and populates a vector of used pipeline resources,
+ /// and a vector of instructions that transitioned to the ready state as a
+ /// result of this event.
+ void issueInstruction(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+ SmallVectorImpl<InstRef> &Ready);
+
+ /// Returns true if IR has to be issued immediately, or if IR is a zero
+ /// latency instruction.
+ bool mustIssueImmediately(const InstRef &IR) const;
+
+ /// This routine notifies the Scheduler that a new cycle just started.
+ ///
+ /// It notifies the underlying ResourceManager that a new cycle just started.
+ /// Vector `Freed` is populated with resourceRef related to resources that
+ /// have changed in state, and that are now available to new instructions.
+ /// Instructions executed are added to vector Executed, while vector Ready is
+ /// populated with instructions that have become ready in this new cycle.
+ void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+ SmallVectorImpl<InstRef> &Ready,
+ SmallVectorImpl<InstRef> &Executed);
+
+ /// Convert a resource mask into a valid llvm processor resource identifier.
+ unsigned getResourceID(uint64_t Mask) const {
+ return Resources->resolveResourceMask(Mask);
+ }
+
+ /// Select the next instruction to issue from the ReadySet. Returns an invalid
+ /// instruction reference if there are no ready instructions, or if processor
+ /// resources are not available.
+ InstRef select();
+
+ /// Returns a mask of busy resources. Each bit of the mask identifies a unique
+ /// processor resource unit. In the absence of bottlenecks caused by resource
+ /// pressure, the mask value returned by this method is always zero.
+ uint64_t getBusyResourceUnits() const { return BusyResourceUnits; }
+ bool arePipelinesFullyUsed() const {
+ return !Resources->getAvailableProcResUnits();
+ }
+ bool isReadySetEmpty() const { return ReadySet.empty(); }
+ bool isWaitSetEmpty() const { return WaitSet.empty(); }
+
+#ifndef NDEBUG
+ // Update the ready queues.
+ void dump() const;
+
+ // This routine performs a sanity check. This routine should only be called
+ // when we know that 'IR' is not in the scheduler's instruction queues.
+ void sanityCheck(const InstRef &IR) const {
+ assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
+ assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
+ assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+ }
+#endif // !NDEBUG
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SCHEDULER_H
diff --git a/linux-x64/clang/include/llvm/MCA/InstrBuilder.h b/linux-x64/clang/include/llvm/MCA/InstrBuilder.h
new file mode 100644
index 0000000..6900163
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/InstrBuilder.h
@@ -0,0 +1,76 @@
+//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A builder class for instructions that are statically analyzed by llvm-mca.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRBUILDER_H
+#define LLVM_MCA_INSTRBUILDER_H
+
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+/// A builder class that knows how to construct Instruction objects.
+///
+/// Every llvm-mca Instruction is described by an object of class InstrDesc.
+/// An InstrDesc describes which registers are read/written by the instruction,
+/// as well as the instruction latency and hardware resources consumed.
+///
+/// This class is used by the tool to construct Instructions and instruction
+/// descriptors (i.e. InstrDesc objects).
+/// Information from the machine scheduling model is used to identify processor
+/// resources that are consumed by an instruction.
+class InstrBuilder {
+ const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCInstrAnalysis *MCIA;
+ SmallVector<uint64_t, 8> ProcResourceMasks;
+
+ DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
+ DenseMap<const MCInst *, std::unique_ptr<const InstrDesc>> VariantDescriptors;
+
+ bool FirstCallInst;
+ bool FirstReturnInst;
+
+ Expected<const InstrDesc &> createInstrDescImpl(const MCInst &MCI);
+ Expected<const InstrDesc &> getOrCreateInstrDesc(const MCInst &MCI);
+
+ InstrBuilder(const InstrBuilder &) = delete;
+ InstrBuilder &operator=(const InstrBuilder &) = delete;
+
+ void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+ void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+ Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
+
+public:
+ InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+ const MCRegisterInfo &RI, const MCInstrAnalysis *IA);
+
+ void clear() {
+ VariantDescriptors.shrink_and_clear();
+ FirstCallInst = true;
+ FirstReturnInst = true;
+ }
+
+ Expected<std::unique_ptr<Instruction>> createInstruction(const MCInst &MCI);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRBUILDER_H
diff --git a/linux-x64/clang/include/llvm/MCA/Instruction.h b/linux-x64/clang/include/llvm/MCA/Instruction.h
new file mode 100644
index 0000000..658b7fe
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Instruction.h
@@ -0,0 +1,570 @@
+//===--------------------- Instruction.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines abstractions used by the Pipeline to model register reads,
+/// register writes and instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRUCTION_H
+#define LLVM_MCA_INSTRUCTION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+
+#ifndef NDEBUG
+#include "llvm/Support/raw_ostream.h"
+#endif
+
+#include <memory>
+
+namespace llvm {
+
+namespace mca {
+
+constexpr int UNKNOWN_CYCLES = -512;
+
+/// A register write descriptor.
+struct WriteDescriptor {
+ // Operand index. The index is negative for implicit writes only.
+ // For implicit writes, the actual operand index is computed performing
+ // a bitwise not of the OpIndex.
+ int OpIndex;
+ // Write latency. Number of cycles before write-back stage.
+ unsigned Latency;
+ // This field is set to a value different than zero only if this
+ // is an implicit definition.
+ unsigned RegisterID;
+ // Instruction itineraries would set this field to the SchedClass ID.
+ // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
+ // element associated to this write.
+ // When computing read latencies, this value is matched against the
+ // "ReadAdvance" information. The hardware backend may implement
+ // dedicated forwarding paths to quickly propagate write results to dependent
+ // instructions waiting in the reservation station (effectively bypassing the
+ // write-back stage).
+ unsigned SClassOrWriteResourceID;
+ // True only if this is a write obtained from an optional definition.
+ // Optional definitions are allowed to reference regID zero (i.e. "no
+ // register").
+ bool IsOptionalDef;
+
+ bool isImplicitWrite() const { return OpIndex < 0; };
+};
+
+/// A register read descriptor.
+struct ReadDescriptor {
+ // A MCOperand index. This is used by the Dispatch logic to identify register
+ // reads. Implicit reads have negative indices. The actual operand index of an
+ // implicit read is the bitwise not of field OpIndex.
+ int OpIndex;
+ // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit
+ // uses always come first in the sequence of uses.
+ unsigned UseIndex;
+ // This field is only set if this is an implicit read.
+ unsigned RegisterID;
+ // Scheduling Class Index. It is used to query the scheduling model for the
+ // MCSchedClassDesc object.
+ unsigned SchedClassID;
+
+ bool isImplicitRead() const { return OpIndex < 0; };
+};
+
+class ReadState;
+
+/// Tracks uses of a register definition (e.g. register write).
+///
+/// Each implicit/explicit register write is associated with an instance of
+/// this class. A WriteState object tracks the dependent users of a
+/// register write. It also tracks how many cycles are left before the write
+/// back stage.
+class WriteState {
+ const WriteDescriptor *WD;
+ // On instruction issue, this field is set equal to the write latency.
+ // Before instruction issue, this field defaults to -512, a special
+ // value that represents an "unknown" number of cycles.
+ int CyclesLeft;
+
+ // Actual register defined by this write. This field is only used
+ // to speedup queries on the register file.
+ // For implicit writes, this field always matches the value of
+ // field RegisterID from WD.
+ unsigned RegisterID;
+
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
+
+ // True if this write implicitly clears the upper portion of RegisterID's
+ // super-registers.
+ bool ClearsSuperRegs;
+
+ // True if this write is from a dependency breaking zero-idiom instruction.
+ bool WritesZero;
+
+ // True if this write has been eliminated at register renaming stage.
+ // Example: a register move doesn't consume scheduler/pipleline resources if
+ // it is eliminated at register renaming stage. It still consumes
+ // decode bandwidth, and ROB entries.
+ bool IsEliminated;
+
+ // This field is set if this is a partial register write, and it has a false
+ // dependency on any previous write of the same register (or a portion of it).
+ // DependentWrite must be able to complete before this write completes, so
+ // that we don't break the WAW, and the two writes can be merged together.
+ const WriteState *DependentWrite;
+
+ // A partial write that is in a false dependency with this write.
+ WriteState *PartialWrite;
+
+ unsigned DependentWriteCyclesLeft;
+
+ // A list of dependent reads. Users is a set of dependent
+ // reads. A dependent read is added to the set only if CyclesLeft
+ // is "unknown". As soon as CyclesLeft is 'known', each user in the set
+ // gets notified with the actual CyclesLeft.
+
+ // The 'second' element of a pair is a "ReadAdvance" number of cycles.
+ SmallVector<std::pair<ReadState *, int>, 4> Users;
+
+public:
+ WriteState(const WriteDescriptor &Desc, unsigned RegID,
+ bool clearsSuperRegs = false, bool writesZero = false)
+ : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
+ ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+ IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+ DependentWriteCyclesLeft(0) {}
+
+ WriteState(const WriteState &Other) = default;
+ WriteState &operator=(const WriteState &Other) = default;
+
+ int getCyclesLeft() const { return CyclesLeft; }
+ unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
+ unsigned getRegisterID() const { return RegisterID; }
+ unsigned getRegisterFileID() const { return PRFID; }
+ unsigned getLatency() const { return WD->Latency; }
+
+ void addUser(ReadState *Use, int ReadAdvance);
+ void addUser(WriteState *Use);
+
+ unsigned getDependentWriteCyclesLeft() const {
+ return DependentWriteCyclesLeft;
+ }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = Users.size();
+ if (PartialWrite)
+ ++NumUsers;
+ return NumUsers;
+ }
+
+ bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+ bool isWriteZero() const { return WritesZero; }
+ bool isEliminated() const { return IsEliminated; }
+
+ bool isReady() const {
+ if (getDependentWrite())
+ return false;
+ unsigned CyclesLeft = getDependentWriteCyclesLeft();
+ return !CyclesLeft || CyclesLeft < getLatency();
+ }
+
+ bool isExecuted() const {
+ return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+ }
+
+ const WriteState *getDependentWrite() const { return DependentWrite; }
+ void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+ void writeStartEvent(unsigned Cycles) {
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
+ }
+
+ void setWriteZero() { WritesZero = true; }
+ void setEliminated() {
+ assert(Users.empty() && "Write is in an inconsistent state.");
+ CyclesLeft = 0;
+ IsEliminated = true;
+ }
+
+ void setPRF(unsigned PRF) { PRFID = PRF; }
+
+ // On every cycle, update CyclesLeft and notify dependent users.
+ void cycleEvent();
+ void onInstructionIssued();
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+/// Tracks register operand latency in cycles.
+///
+/// A read may be dependent on more than one write. This occurs when some
+/// writes only partially update the register associated to this read.
+class ReadState {
+ const ReadDescriptor *RD;
+ // Physical register identified associated to this read.
+ unsigned RegisterID;
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
+ // Number of writes that contribute to the definition of RegisterID.
+ // In the absence of partial register updates, the number of DependentWrites
+ // cannot be more than one.
+ unsigned DependentWrites;
+ // Number of cycles left before RegisterID can be read. This value depends on
+ // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES.
+ // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of
+ // every dependent write is known.
+ int CyclesLeft;
+ // This field is updated on every writeStartEvent(). When the number of
+ // dependent writes (i.e. field DependentWrite) is zero, this value is
+ // propagated to field CyclesLeft.
+ unsigned TotalCycles;
+ // This field is set to true only if there are no dependent writes, and
+ // there are no `CyclesLeft' to wait.
+ bool IsReady;
+ // True if this is a read from a known zero register.
+ bool IsZero;
+ // True if this register read is from a dependency-breaking instruction.
+ bool IndependentFromDef;
+
+public:
+ ReadState(const ReadDescriptor &Desc, unsigned RegID)
+ : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
+ CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+ IsZero(false), IndependentFromDef(false) {}
+
+ const ReadDescriptor &getDescriptor() const { return *RD; }
+ unsigned getSchedClass() const { return RD->SchedClassID; }
+ unsigned getRegisterID() const { return RegisterID; }
+ unsigned getRegisterFileID() const { return PRFID; }
+
+ bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
+ bool isReady() const { return IsReady; }
+ bool isImplicitRead() const { return RD->isImplicitRead(); }
+
+ bool isIndependentFromDef() const { return IndependentFromDef; }
+ void setIndependentFromDef() { IndependentFromDef = true; }
+
+ void cycleEvent();
+ void writeStartEvent(unsigned Cycles);
+ void setDependentWrites(unsigned Writes) {
+ DependentWrites = Writes;
+ IsReady = !Writes;
+ }
+
+ bool isReadZero() const { return IsZero; }
+ void setReadZero() { IsZero = true; }
+ void setPRF(unsigned ID) { PRFID = ID; }
+};
+
+/// A sequence of cycles.
+///
+/// This class can be used as a building block to construct ranges of cycles.
+class CycleSegment {
+ unsigned Begin; // Inclusive.
+ unsigned End; // Exclusive.
+ bool Reserved; // Resources associated to this segment must be reserved.
+
+public:
+ CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false)
+ : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {}
+
+ bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; }
+ bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; }
+ bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; }
+ bool overlaps(const CycleSegment &CS) const {
+ return !startsAfter(CS) && !endsBefore(CS);
+ }
+ bool isExecuting() const { return Begin == 0 && End != 0; }
+ bool isExecuted() const { return End == 0; }
+ bool operator<(const CycleSegment &Other) const {
+ return Begin < Other.Begin;
+ }
+ CycleSegment &operator--(void) {
+ if (Begin)
+ Begin--;
+ if (End)
+ End--;
+ return *this;
+ }
+
+ bool isValid() const { return Begin <= End; }
+ unsigned size() const { return End - Begin; };
+ void subtract(unsigned Cycles) {
+ assert(End >= Cycles);
+ End -= Cycles;
+ }
+
+ unsigned begin() const { return Begin; }
+ unsigned end() const { return End; }
+ void setEnd(unsigned NewEnd) { End = NewEnd; }
+ bool isReserved() const { return Reserved; }
+ void setReserved() { Reserved = true; }
+};
+
+/// Helper used by class InstrDesc to describe how hardware resources
+/// are used.
+///
+/// This class describes how many resource units of a specific resource kind
+/// (and how many cycles) are "used" by an instruction.
+struct ResourceUsage {
+ CycleSegment CS;
+ unsigned NumUnits;
+ ResourceUsage(CycleSegment Cycles, unsigned Units = 1)
+ : CS(Cycles), NumUnits(Units) {}
+ unsigned size() const { return CS.size(); }
+ bool isReserved() const { return CS.isReserved(); }
+ void setReserved() { CS.setReserved(); }
+};
+
+/// An instruction descriptor
+struct InstrDesc {
+ SmallVector<WriteDescriptor, 4> Writes; // Implicit writes are at the end.
+ SmallVector<ReadDescriptor, 4> Reads; // Implicit reads are at the end.
+
+ // For every resource used by an instruction of this kind, this vector
+ // reports the number of "consumed cycles".
+ SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
+
+ // A list of buffered resources consumed by this instruction.
+ SmallVector<uint64_t, 4> Buffers;
+
+ unsigned UsedProcResUnits;
+ unsigned UsedProcResGroups;
+
+ unsigned MaxLatency;
+ // Number of MicroOps for this instruction.
+ unsigned NumMicroOps;
+ // SchedClassID used to construct this InstrDesc.
+ // This information is currently used by views to do fast queries on the
+ // subtarget when computing the reciprocal throughput.
+ unsigned SchedClassID;
+
+ bool MayLoad;
+ bool MayStore;
+ bool HasSideEffects;
+ bool BeginGroup;
+ bool EndGroup;
+
+ // True if all buffered resources are in-order, and there is at least one
+ // buffer which is a dispatch hazard (BufferSize = 0).
+ bool MustIssueImmediately;
+
+ // A zero latency instruction doesn't consume any scheduler resources.
+ bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
+
+ InstrDesc() = default;
+ InstrDesc(const InstrDesc &Other) = delete;
+ InstrDesc &operator=(const InstrDesc &Other) = delete;
+};
+
+/// Base class for instructions consumed by the simulation pipeline.
+///
+/// This class tracks data dependencies as well as generic properties
+/// of the instruction.
+class InstructionBase {
+ const InstrDesc &Desc;
+
+ // This field is set for instructions that are candidates for move
+ // elimination. For more information about move elimination, see the
+ // definition of RegisterMappingTracker in RegisterFile.h
+ bool IsOptimizableMove;
+
+ // Output dependencies.
+ // One entry per each implicit and explicit register definition.
+ SmallVector<WriteState, 4> Defs;
+
+ // Input dependencies.
+ // One entry per each implicit and explicit register use.
+ SmallVector<ReadState, 4> Uses;
+
+public:
+ InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+
+ SmallVectorImpl<WriteState> &getDefs() { return Defs; }
+ const ArrayRef<WriteState> getDefs() const { return Defs; }
+ SmallVectorImpl<ReadState> &getUses() { return Uses; }
+ const ArrayRef<ReadState> getUses() const { return Uses; }
+ const InstrDesc &getDesc() const { return Desc; }
+
+ unsigned getLatency() const { return Desc.MaxLatency; }
+
+ bool hasDependentUsers() const {
+ return any_of(Defs,
+ [](const WriteState &Def) { return Def.getNumUsers() > 0; });
+ }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = 0;
+ for (const WriteState &Def : Defs)
+ NumUsers += Def.getNumUsers();
+ return NumUsers;
+ }
+
+ // Returns true if this instruction is a candidate for move elimination.
+ bool isOptimizableMove() const { return IsOptimizableMove; }
+ void setOptimizableMove() { IsOptimizableMove = true; }
+};
+
+/// An instruction propagated through the simulated instruction pipeline.
+///
+/// This class is used to monitor changes to the internal state of instructions
+/// that are sent to the various components of the simulated hardware pipeline.
+class Instruction : public InstructionBase {
+ enum InstrStage {
+ IS_INVALID, // Instruction in an invalid state.
+ IS_DISPATCHED, // Instruction dispatched but operands are not ready.
+ IS_PENDING, // Instruction is not ready, but operand latency is known.
+ IS_READY, // Instruction dispatched and operands ready.
+ IS_EXECUTING, // Instruction issued.
+ IS_EXECUTED, // Instruction executed. Values are written back.
+ IS_RETIRED // Instruction retired.
+ };
+
+ // The current instruction stage.
+ enum InstrStage Stage;
+
+ // This value defaults to the instruction latency. This instruction is
+ // considered executed when field CyclesLeft goes to zero.
+ int CyclesLeft;
+
+ // Retire Unit token ID for this instruction.
+ unsigned RCUTokenID;
+
+public:
+ Instruction(const InstrDesc &D)
+ : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
+ RCUTokenID(0) {}
+
+ unsigned getRCUTokenID() const { return RCUTokenID; }
+ int getCyclesLeft() const { return CyclesLeft; }
+
+ // Transition to the dispatch stage, and assign a RCUToken to this
+ // instruction. The RCUToken is used to track the completion of every
+ // register write performed by this instruction.
+ void dispatch(unsigned RCUTokenID);
+
+ // Instruction issued. Transition to the IS_EXECUTING state, and update
+ // all the definitions.
+ void execute();
+
+ // Force a transition from the IS_DISPATCHED state to the IS_READY or
+ // IS_PENDING state. State transitions normally occur either at the beginning
+ // of a new cycle (see method cycleEvent()), or as a result of another issue
+ // event. This method is called every time the instruction might have changed
+ // in state. It internally delegates to method updateDispatched() and
+ // updateWaiting().
+ void update();
+ bool updateDispatched();
+ bool updatePending();
+
+ bool isDispatched() const { return Stage == IS_DISPATCHED; }
+ bool isPending() const { return Stage == IS_PENDING; }
+ bool isReady() const { return Stage == IS_READY; }
+ bool isExecuting() const { return Stage == IS_EXECUTING; }
+ bool isExecuted() const { return Stage == IS_EXECUTED; }
+ bool isRetired() const { return Stage == IS_RETIRED; }
+
+ bool isEliminated() const {
+ return isReady() && getDefs().size() &&
+ all_of(getDefs(),
+ [](const WriteState &W) { return W.isEliminated(); });
+ }
+
+ // Forces a transition from state IS_DISPATCHED to state IS_EXECUTED.
+ void forceExecuted();
+
+ void retire() {
+ assert(isExecuted() && "Instruction is in an invalid state!");
+ Stage = IS_RETIRED;
+ }
+
+ void cycleEvent();
+};
+
+/// An InstRef contains both a SourceMgr index and Instruction pair. The index
+/// is used as a unique identifier for the instruction. MCA will make use of
+/// this index as a key throughout MCA.
+class InstRef {
+ std::pair<unsigned, Instruction *> Data;
+
+public:
+ InstRef() : Data(std::make_pair(0, nullptr)) {}
+ InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
+
+ bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+
+ unsigned getSourceIndex() const { return Data.first; }
+ Instruction *getInstruction() { return Data.second; }
+ const Instruction *getInstruction() const { return Data.second; }
+
+ /// Returns true if this references a valid instruction.
+ operator bool() const { return Data.second != nullptr; }
+
+ /// Invalidate this reference.
+ void invalidate() { Data.second = nullptr; }
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const { OS << getSourceIndex(); }
+#endif
+};
+
+#ifndef NDEBUG
+inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
+ IR.print(OS);
+ return OS;
+}
+#endif
+
+/// A reference to a register write.
+///
+/// This class is mainly used by the register file to describe register
+/// mappings. It correlates a register write to the source index of the
+/// defining instruction.
+class WriteRef {
+ std::pair<unsigned, WriteState *> Data;
+ static const unsigned INVALID_IID;
+
+public:
+ WriteRef() : Data(INVALID_IID, nullptr) {}
+ WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
+
+ unsigned getSourceIndex() const { return Data.first; }
+ const WriteState *getWriteState() const { return Data.second; }
+ WriteState *getWriteState() { return Data.second; }
+ void invalidate() { Data.second = nullptr; }
+ bool isWriteZero() const {
+ assert(isValid() && "Invalid null WriteState found!");
+ return getWriteState()->isWriteZero();
+ }
+
+ /// Returns true if this register write has been executed, and the new
+ /// register value is therefore available to users.
+ bool isAvailable() const {
+ if (getSourceIndex() == INVALID_IID)
+ return false;
+ const WriteState *WS = getWriteState();
+ return !WS || WS->isExecuted();
+ }
+
+ bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+ bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRUCTION_H
diff --git a/linux-x64/clang/include/llvm/MCA/Pipeline.h b/linux-x64/clang/include/llvm/MCA/Pipeline.h
new file mode 100644
index 0000000..935033f
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Pipeline.h
@@ -0,0 +1,78 @@
+//===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_PIPELINE_H
+#define LLVM_MCA_PIPELINE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+class HWEventListener;
+
+/// A pipeline for a specific subtarget.
+///
+/// It emulates an out-of-order execution of instructions. Instructions are
+/// fetched from a MCInst sequence managed by an initial 'Fetch' stage.
+/// Instructions are firstly fetched, then dispatched to the schedulers, and
+/// then executed.
+///
+/// This class tracks the lifetime of an instruction from the moment where
+/// it gets dispatched to the schedulers, to the moment where it finishes
+/// executing and register writes are architecturally committed.
+/// In particular, it monitors changes in the state of every instruction
+/// in flight.
+///
+/// Instructions are executed in a loop of iterations. The number of iterations
+/// is defined by the SourceMgr object, which is managed by the initial stage
+/// of the instruction pipeline.
+///
+/// The Pipeline entry point is method 'run()' which executes cycles in a loop
+/// until there are new instructions to dispatch, and not every instruction
+/// has been retired.
+///
+/// Internally, the Pipeline collects statistical information in the form of
+/// histograms. For example, it tracks how the dispatch group size changes
+/// over time.
+class Pipeline {
+ Pipeline(const Pipeline &P) = delete;
+ Pipeline &operator=(const Pipeline &P) = delete;
+
+ /// An ordered list of stages that define this instruction pipeline.
+ SmallVector<std::unique_ptr<Stage>, 8> Stages;
+ std::set<HWEventListener *> Listeners;
+ unsigned Cycles;
+
+ Error runCycle();
+ bool hasWorkToProcess();
+ void notifyCycleBegin();
+ void notifyCycleEnd();
+
+public:
+ Pipeline() : Cycles(0) {}
+ void appendStage(std::unique_ptr<Stage> S);
+
+ /// Returns the total number of simulated cycles.
+ Expected<unsigned> run();
+
+ void addEventListener(HWEventListener *Listener);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_PIPELINE_H
diff --git a/linux-x64/clang/include/llvm/MCA/SourceMgr.h b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
new file mode 100644
index 0000000..dbe31db
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
@@ -0,0 +1,56 @@
+//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements class SourceMgr. Class SourceMgr abstracts the input
+/// code sequence (a sequence of MCInst), and assings unique identifiers to
+/// every instruction in the sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SOURCEMGR_H
+#define LLVM_MCA_SOURCEMGR_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace mca {
+
+class Instruction;
+
+typedef std::pair<unsigned, const Instruction &> SourceRef;
+
+class SourceMgr {
+ using UniqueInst = std::unique_ptr<Instruction>;
+ ArrayRef<UniqueInst> Sequence;
+ unsigned Current;
+ const unsigned Iterations;
+ static const unsigned DefaultIterations = 100;
+
+public:
+ SourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
+ : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
+
+ unsigned getNumIterations() const { return Iterations; }
+ unsigned size() const { return Sequence.size(); }
+ bool hasNext() const { return Current < (Iterations * Sequence.size()); }
+ void updateNext() { ++Current; }
+
+ SourceRef peekNext() const {
+ assert(hasNext() && "Already at end of sequence!");
+ return SourceRef(Current, *Sequence[Current % Sequence.size()]);
+ }
+
+ using const_iterator = ArrayRef<UniqueInst>::const_iterator;
+ const_iterator begin() const { return Sequence.begin(); }
+ const_iterator end() const { return Sequence.end(); }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SOURCEMGR_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
new file mode 100644
index 0000000..e39f03e
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
@@ -0,0 +1,90 @@
+//===----------------------- DispatchStage.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_DISPATCH_STAGE_H
+#define LLVM_MCA_DISPATCH_STAGE_H
+
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+// Implements the hardware dispatch logic.
+//
+// This class is responsible for the dispatch stage, in which instructions are
+// dispatched in groups to the Scheduler. An instruction can be dispatched if
+// the following conditions are met:
+// 1) There are enough entries in the reorder buffer (see class
+// RetireControlUnit) to write the opcodes associated with the instruction.
+// 2) There are enough physical registers to rename output register operands.
+// 3) There are enough entries available in the used buffered resource(s).
+//
+// The number of micro opcodes that can be dispatched in one cycle is limited by
+// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
+// processor resources are not available. Dispatch stall events are counted
+// during the entire execution of the code, and displayed by the performance
+// report when flag '-dispatch-stats' is specified.
+//
+// If the number of micro opcodes exceedes DispatchWidth, then the instruction
+// is dispatched in multiple cycles.
+class DispatchStage final : public Stage {
+ unsigned DispatchWidth;
+ unsigned AvailableEntries;
+ unsigned CarryOver;
+ InstRef CarriedOver;
+ const MCSubtargetInfo &STI;
+ RetireControlUnit &RCU;
+ RegisterFile &PRF;
+
+ bool checkRCU(const InstRef &IR) const;
+ bool checkPRF(const InstRef &IR) const;
+ bool canDispatch(const InstRef &IR) const;
+ Error dispatch(InstRef IR);
+
+ void notifyInstructionDispatched(const InstRef &IR,
+ ArrayRef<unsigned> UsedPhysRegs,
+ unsigned uOps) const;
+
+public:
+ DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
+ unsigned MaxDispatchWidth, RetireControlUnit &R,
+ RegisterFile &F)
+ : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+ CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+
+ bool isAvailable(const InstRef &IR) const override;
+
+ // The dispatch logic internally doesn't buffer instructions. So there is
+ // never work to do at the beginning of every cycle.
+ bool hasWorkToComplete() const override { return false; }
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_DISPATCH_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h b/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h
new file mode 100644
index 0000000..59a2daf
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h
@@ -0,0 +1,51 @@
+//===---------------------- EntryStage.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Entry stage of an instruction pipeline. Its sole
+/// purpose in life is to pick instructions in sequence and move them to the
+/// next pipeline stage.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_ENTRY_STAGE_H
+#define LLVM_MCA_ENTRY_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class EntryStage final : public Stage {
+ InstRef CurrentInstruction;
+ SmallVector<std::unique_ptr<Instruction>, 16> Instructions;
+ SourceMgr &SM;
+ unsigned NumRetired;
+
+ // Updates the program counter, and sets 'CurrentInstruction'.
+ void getNextInstruction();
+
+ EntryStage(const EntryStage &Other) = delete;
+ EntryStage &operator=(const EntryStage &Other) = delete;
+
+public:
+ EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { }
+
+ bool isAvailable(const InstRef &IR) const override;
+ bool hasWorkToComplete() const override;
+ Error execute(InstRef &IR) override;
+ Error cycleStart() override;
+ Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_FETCH_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h b/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h
new file mode 100644
index 0000000..ec9eae0
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h
@@ -0,0 +1,79 @@
+//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of a default instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_EXECUTE_STAGE_H
+#define LLVM_MCA_EXECUTE_STAGE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class ExecuteStage final : public Stage {
+ Scheduler &HWS;
+
+ Error issueInstruction(InstRef &IR);
+
+ // Called at the beginning of each cycle to issue already dispatched
+ // instructions to the underlying pipelines.
+ Error issueReadyInstructions();
+
+ // Used to notify instructions eliminated at register renaming stage.
+ Error handleInstructionEliminated(InstRef &IR);
+
+ ExecuteStage(const ExecuteStage &Other) = delete;
+ ExecuteStage &operator=(const ExecuteStage &Other) = delete;
+
+public:
+ ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+
+ // This stage works under the assumption that the Pipeline will eventually
+ // execute a retire stage. We don't need to check if pipelines and/or
+ // schedulers have instructions to process, because those instructions are
+ // also tracked by the retire control unit. That means,
+ // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
+ // are still instructions in-flight in the out-of-order backend.
+ bool hasWorkToComplete() const override { return false; }
+ bool isAvailable(const InstRef &IR) const override;
+
+ // Notifies the scheduler that a new cycle just started.
+ //
+ // This method notifies the scheduler that a new cycle started.
+ // This method is also responsible for notifying listeners about instructions
+ // state changes, and processor resources freed by the scheduler.
+ // Instructions that transitioned to the 'Executed' state are automatically
+ // moved to the next stage (i.e. RetireStage).
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
+
+ void notifyInstructionIssued(
+ const InstRef &IR,
+ MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
+ void notifyInstructionExecuted(const InstRef &IR) const;
+ void notifyInstructionReady(const InstRef &IR) const;
+ void notifyResourceAvailable(const ResourceRef &RR) const;
+
+ // Notify listeners that buffered resources have been consumed or freed.
+ void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_EXECUTE_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h b/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h
new file mode 100644
index 0000000..4b463c9
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h
@@ -0,0 +1,45 @@
+//===--------------------- InstructionTables.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a custom stage to generate instruction tables.
+/// See the description of command-line flag -instruction-tables in
+/// docs/CommandGuide/lvm-mca.rst
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRUCTIONTABLES_H
+#define LLVM_MCA_INSTRUCTIONTABLES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class InstructionTables final : public Stage {
+ const MCSchedModel &SM;
+ SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+ SmallVector<uint64_t, 8> Masks;
+
+public:
+ InstructionTables(const MCSchedModel &Model)
+ : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
+ computeProcResourceMasks(Model, Masks);
+ }
+
+ bool hasWorkToComplete() const override { return false; }
+ Error execute(InstRef &IR) override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRUCTIONTABLES_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
new file mode 100644
index 0000000..08c216a
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
@@ -0,0 +1,47 @@
+//===---------------------- RetireStage.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of a default instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RETIRE_STAGE_H
+#define LLVM_MCA_RETIRE_STAGE_H
+
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class RetireStage final : public Stage {
+ // Owner will go away when we move listeners/eventing to the stages.
+ RetireControlUnit &RCU;
+ RegisterFile &PRF;
+
+ RetireStage(const RetireStage &Other) = delete;
+ RetireStage &operator=(const RetireStage &Other) = delete;
+
+public:
+ RetireStage(RetireControlUnit &R, RegisterFile &F)
+ : Stage(), RCU(R), PRF(F) {}
+
+ bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
+ void notifyInstructionRetired(const InstRef &IR) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RETIRE_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/Stage.h b/linux-x64/clang/include/llvm/MCA/Stages/Stage.h
new file mode 100644
index 0000000..46b242c
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/Stage.h
@@ -0,0 +1,87 @@
+//===---------------------- Stage.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_STAGE_H
+#define LLVM_MCA_STAGE_H
+
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Error.h"
+#include <set>
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+
+class Stage {
+ Stage *NextInSequence;
+ std::set<HWEventListener *> Listeners;
+
+ Stage(const Stage &Other) = delete;
+ Stage &operator=(const Stage &Other) = delete;
+
+protected:
+ const std::set<HWEventListener *> &getListeners() const { return Listeners; }
+
+public:
+ Stage() : NextInSequence(nullptr) {}
+ virtual ~Stage();
+
+ /// Returns true if it can execute IR during this cycle.
+ virtual bool isAvailable(const InstRef &IR) const { return true; }
+
+ /// Returns true if some instructions are still executing this stage.
+ virtual bool hasWorkToComplete() const = 0;
+
+ /// Called once at the start of each cycle. This can be used as a setup
+ /// phase to prepare for the executions during the cycle.
+ virtual Error cycleStart() { return ErrorSuccess(); }
+
+ /// Called once at the end of each cycle.
+ virtual Error cycleEnd() { return ErrorSuccess(); }
+
+ /// The primary action that this stage performs on instruction IR.
+ virtual Error execute(InstRef &IR) = 0;
+
+ void setNextInSequence(Stage *NextStage) {
+ assert(!NextInSequence && "This stage already has a NextInSequence!");
+ NextInSequence = NextStage;
+ }
+
+ bool checkNextStage(const InstRef &IR) const {
+ return NextInSequence && NextInSequence->isAvailable(IR);
+ }
+
+ /// Called when an instruction is ready to move the next pipeline stage.
+ ///
+ /// Stages are responsible for moving instructions to their immediate
+ /// successor stages.
+ Error moveToTheNextStage(InstRef &IR) {
+ assert(checkNextStage(IR) && "Next stage is not ready!");
+ return NextInSequence->execute(IR);
+ }
+
+ /// Add a listener to receive callbacks during the execution of this stage.
+ void addListener(HWEventListener *Listener);
+
+ /// Notify listeners of a particular hardware event.
+ template <typename EventT> void notifyEvent(const EventT &Event) const {
+ for (HWEventListener *Listener : Listeners)
+ Listener->onEvent(Event);
+ }
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Support.h b/linux-x64/clang/include/llvm/MCA/Support.h
new file mode 100644
index 0000000..fc36ed4
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Support.h
@@ -0,0 +1,107 @@
+//===--------------------- Support.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Helper functions used by various pipeline components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SUPPORT_H
+#define LLVM_MCA_SUPPORT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+template <typename T>
+class InstructionError : public ErrorInfo<InstructionError<T>> {
+public:
+ static char ID;
+ std::string Message;
+ const T &Inst;
+
+ InstructionError(std::string M, const T &MCI)
+ : Message(std::move(M)), Inst(MCI) {}
+
+ void log(raw_ostream &OS) const override { OS << Message; }
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+};
+
+template <typename T> char InstructionError<T>::ID;
+
+/// This class represents the number of cycles per resource (fractions of
+/// cycles). That quantity is managed here as a ratio, and accessed via the
+/// double cast-operator below. The two quantities, number of cycles and
+/// number of resources, are kept separate. This is used by the
+/// ResourcePressureView to calculate the average resource cycles
+/// per instruction/iteration.
+class ResourceCycles {
+ unsigned Numerator, Denominator;
+
+public:
+ ResourceCycles() : Numerator(0), Denominator(1) {}
+ ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
+ : Numerator(Cycles), Denominator(ResourceUnits) {}
+
+ operator double() const {
+ assert(Denominator && "Invalid denominator (must be non-zero).");
+ return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
+ }
+
+ unsigned getNumerator() const { return Numerator; }
+ unsigned getDenominator() const { return Denominator; }
+
+ // Add the components of RHS to this instance. Instead of calculating
+ // the final value here, we keep track of the numerator and denominator
+ // separately, to reduce floating point error.
+ ResourceCycles &operator+=(const ResourceCycles &RHS);
+};
+
+/// Populates vector Masks with processor resource masks.
+///
+/// The number of bits set in a mask depends on the processor resource type.
+/// Each processor resource mask has at least one bit set. For groups, the
+/// number of bits set in the mask is equal to the cardinality of the group plus
+/// one. Excluding the most significant bit, the remaining bits in the mask
+/// identify processor resources that are part of the group.
+///
+/// Example:
+///
+/// ResourceA -- Mask: 0b001
+/// ResourceB -- Mask: 0b010
+/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
+///
+/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
+/// Each resource mask uniquely identifies a resource; both ResourceA and
+/// ResourceB only have one bit set.
+/// ResourceAB is a group; excluding the most significant bit in the mask, the
+/// remaining bits identify the composition of the group.
+///
+/// Resource masks are used by the ResourceManager to solve set membership
+/// problems with simple bit manipulation operations.
+void computeProcResourceMasks(const MCSchedModel &SM,
+ MutableArrayRef<uint64_t> Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+/// - NumMicroOps / DispatchWidth
+/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource).
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+ unsigned NumMicroOps,
+ ArrayRef<unsigned> ProcResourceUsage);
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SUPPORT_H