Update prebuilt Clang to match Android kernel.

Bug: 132428451
Change-Id: I8f6e2cb23f381fc0c02ddea99b867e58e925e5be
diff --git a/linux-x64/clang/include/llvm/MCA/Context.h b/linux-x64/clang/include/llvm/MCA/Context.h
new file mode 100644
index 0000000..a9f3e05
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Context.h
@@ -0,0 +1,68 @@
+//===---------------------------- Context.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units.  A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_CONTEXT_H
+#define LLVM_MCA_CONTEXT_H
+
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/SourceMgr.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+/// This is a convenience struct to hold the parameters necessary for creating
+/// the pre-built "default" out-of-order pipeline.
+struct PipelineOptions {
+  PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
+                  bool NoAlias)
+      : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
+        StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
+  unsigned DispatchWidth;
+  unsigned RegisterFileSize;
+  unsigned LoadQueueSize;
+  unsigned StoreQueueSize;
+  bool AssumeNoAlias;
+};
+
+class Context {
+  SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
+  const MCRegisterInfo &MRI;
+  const MCSubtargetInfo &STI;
+
+public:
+  Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {}
+  Context(const Context &C) = delete;
+  Context &operator=(const Context &C) = delete;
+
+  void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
+    Hardware.push_back(std::move(H));
+  }
+
+  /// Construct a basic pipeline for simulating an out-of-order pipeline.
+  /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
+  std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
+                                                  InstrBuilder &IB,
+                                                  SourceMgr &SrcMgr);
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_CONTEXT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HWEventListener.h b/linux-x64/clang/include/llvm/MCA/HWEventListener.h
new file mode 100644
index 0000000..1857ad2
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HWEventListener.h
@@ -0,0 +1,155 @@
+//===----------------------- HWEventListener.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for hardware event listeners.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_HWEVENTLISTENER_H
+#define LLVM_MCA_HWEVENTLISTENER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+// An HWInstructionEvent represents state changes of instructions that
+// listeners might be interested in. Listeners can choose to ignore any event
+// they are not interested in.
+class HWInstructionEvent {
+public:
+  // This is the list of event types that are shared by all targets, that
+  // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
+  // ...) and generic Views can manipulate.
+  // Subtargets are free to define additional event types, that are goin to be
+  // handled by generic components as opaque values, but can still be
+  // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
+  // DispatchStage, ...) and interpreted by subtarget-specific EventListener
+  // implementations.
+  enum GenericEventType {
+    Invalid = 0,
+    // Events generated by the Retire Control Unit.
+    Retired,
+    // Events generated by the Scheduler.
+    Ready,
+    Issued,
+    Executed,
+    // Events generated by the Dispatch logic.
+    Dispatched,
+
+    LastGenericEventType,
+  };
+
+  HWInstructionEvent(unsigned type, const InstRef &Inst)
+      : Type(type), IR(Inst) {}
+
+  // The event type. The exact meaning depends on the subtarget.
+  const unsigned Type;
+
+  // The instruction this event was generated for.
+  const InstRef &IR;
+};
+
+class HWInstructionIssuedEvent : public HWInstructionEvent {
+public:
+  using ResourceRef = std::pair<uint64_t, uint64_t>;
+  HWInstructionIssuedEvent(const InstRef &IR,
+                           ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
+      : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
+
+  ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
+};
+
+class HWInstructionDispatchedEvent : public HWInstructionEvent {
+public:
+  HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef<unsigned> Regs,
+                               unsigned UOps)
+      : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
+        UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
+  // Number of physical register allocated for this instruction. There is one
+  // entry per register file.
+  ArrayRef<unsigned> UsedPhysRegs;
+  // Number of micro opcodes dispatched.
+  // This field is often set to the total number of micro-opcodes specified by
+  // the instruction descriptor of IR.
+  // The only exception is when IR declares a number of micro opcodes
+  // which exceeds the processor DispatchWidth, and - by construction - it
+  // requires multiple cycles to be fully dispatched. In that particular case,
+  // the dispatch logic would generate more than one dispatch event (one per
+  // cycle), and each event would declare how many micro opcodes are effectively
+  // been dispatched to the schedulers.
+  unsigned MicroOpcodes;
+};
+
+class HWInstructionRetiredEvent : public HWInstructionEvent {
+public:
+  HWInstructionRetiredEvent(const InstRef &IR, ArrayRef<unsigned> Regs)
+      : HWInstructionEvent(HWInstructionEvent::Retired, IR),
+        FreedPhysRegs(Regs) {}
+  // Number of register writes that have been architecturally committed. There
+  // is one entry per register file.
+  ArrayRef<unsigned> FreedPhysRegs;
+};
+
+// A HWStallEvent represents a pipeline stall caused by the lack of hardware
+// resources.
+class HWStallEvent {
+public:
+  enum GenericEventType {
+    Invalid = 0,
+    // Generic stall events generated by the DispatchStage.
+    RegisterFileStall,
+    RetireControlUnitStall,
+    // Generic stall events generated by the Scheduler.
+    DispatchGroupStall,
+    SchedulerQueueFull,
+    LoadQueueFull,
+    StoreQueueFull,
+    LastGenericEvent
+  };
+
+  HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {}
+
+  // The exact meaning of the stall event type depends on the subtarget.
+  const unsigned Type;
+
+  // The instruction this event was generated for.
+  const InstRef &IR;
+};
+
+class HWEventListener {
+public:
+  // Generic events generated by the pipeline.
+  virtual void onCycleBegin() {}
+  virtual void onCycleEnd() {}
+
+  virtual void onEvent(const HWInstructionEvent &Event) {}
+  virtual void onEvent(const HWStallEvent &Event) {}
+
+  using ResourceRef = std::pair<uint64_t, uint64_t>;
+  virtual void onResourceAvailable(const ResourceRef &RRef) {}
+
+  // Events generated by the Scheduler when buffered resources are
+  // consumed/freed for an instruction.
+  virtual void onReservedBuffers(const InstRef &Inst,
+                                 ArrayRef<unsigned> Buffers) {}
+  virtual void onReleasedBuffers(const InstRef &Inst,
+                                 ArrayRef<unsigned> Buffers) {}
+
+  virtual ~HWEventListener() {}
+
+private:
+  virtual void anchor();
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_HWEVENTLISTENER_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h
new file mode 100644
index 0000000..f6e178b
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -0,0 +1,32 @@
+//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a base class for describing a simulated hardware
+/// unit.  These units are used to construct a simulated backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_HARDWAREUNIT_H
+#define LLVM_MCA_HARDWAREUNIT_H
+
+namespace llvm {
+namespace mca {
+
+class HardwareUnit {
+  HardwareUnit(const HardwareUnit &H) = delete;
+  HardwareUnit &operator=(const HardwareUnit &H) = delete;
+
+public:
+  HardwareUnit() = default;
+  virtual ~HardwareUnit();
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_HARDWAREUNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
new file mode 100644
index 0000000..b8a9f27
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -0,0 +1,206 @@
+//===------------------------- LSUnit.h --------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load/Store unit class that models load/store queues and that implements
+/// a simple weak memory consistency model.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_LSUNIT_H
+#define LLVM_MCA_LSUNIT_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+class Scheduler;
+
+/// A Load/Store Unit implementing a load and store queues.
+///
+/// This class implements a load queue and a store queue to emulate the
+/// out-of-order execution of memory operations.
+/// Each load (or store) consumes an entry in the load (or store) queue.
+///
+/// Rules are:
+/// 1) A younger load is allowed to pass an older load only if there are no
+///    stores nor barriers in between the two loads.
+/// 2) An younger store is not allowed to pass an older store.
+/// 3) A younger store is not allowed to pass an older load.
+/// 4) A younger load is allowed to pass an older store only if the load does
+///    not alias with the store.
+///
+/// This class optimistically assumes that loads don't alias store operations.
+/// Under this assumption, younger loads are always allowed to pass older
+/// stores (this would only affects rule 4).
+/// Essentially, this class doesn't perform any sort alias analysis to
+/// identify aliasing loads and stores.
+///
+/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
+/// set to `false` by the constructor of LSUnit.
+///
+/// Note that this class doesn't know about the existence of different memory
+/// types for memory operations (example: write-through, write-combining, etc.).
+/// Derived classes are responsible for implementing that extra knowledge, and
+/// provide different sets of rules for loads and stores by overriding method
+/// `isReady()`.
+/// To emulate a write-combining memory type, rule 2. must be relaxed in a
+/// derived class to enable the reordering of non-aliasing store operations.
+///
+/// No assumptions are made by this class on the size of the store buffer.  This
+/// class doesn't know how to identify cases where store-to-load forwarding may
+/// occur.
+///
+/// LSUnit doesn't attempt to predict whether a load or store hits or misses
+/// the L1 cache. To be more specific, LSUnit doesn't know anything about
+/// cache hierarchy and memory types.
+/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
+/// scheduling model provides an "optimistic" load-to-use latency (which usually
+/// matches the load-to-use latency for when there is a hit in the L1D).
+/// Derived classes may expand this knowledge.
+///
+/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
+/// memory-barrier like instructions.
+/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
+/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
+/// serializes loads without forcing a flush of the load queue.
+/// Similarly, instructions that both `mayStore` and have `unmodeled side
+/// effects` are treated like store barriers. A full memory
+/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
+/// effects. This is obviously inaccurate, but this is the best that we can do
+/// at the moment.
+///
+/// Each load/store barrier consumes one entry in the load/store queue. A
+/// load/store barrier enforces ordering of loads/stores:
+///  - A younger load cannot pass a load barrier.
+///  - A younger store cannot pass a store barrier.
+///
+/// A younger load has to wait for the memory load barrier to execute.
+/// A load/store barrier is "executed" when it becomes the oldest entry in
+/// the load/store queue(s). That also means, all the older loads/stores have
+/// already been executed.
+class LSUnit : public HardwareUnit {
+  // Load queue size.
+  // LQ_Size == 0 means that there are infinite slots in the load queue.
+  unsigned LQ_Size;
+
+  // Store queue size.
+  // SQ_Size == 0 means that there are infinite slots in the store queue.
+  unsigned SQ_Size;
+
+  // If true, loads will never alias with stores. This is the default.
+  bool NoAlias;
+
+  // When a `MayLoad` instruction is dispatched to the schedulers for execution,
+  // the LSUnit reserves an entry in the `LoadQueue` for it.
+  //
+  // LoadQueue keeps track of all the loads that are in-flight. A load
+  // instruction is eventually removed from the LoadQueue when it reaches
+  // completion stage. That means, a load leaves the queue whe it is 'executed',
+  // and its value can be forwarded on the data path to outside units.
+  //
+  // This class doesn't know about the latency of a load instruction. So, it
+  // conservatively/pessimistically assumes that the latency of a load opcode
+  // matches the instruction latency.
+  //
+  // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
+  // and load/store conflicts, the latency of a load is determined by the depth
+  // of the load pipeline. So, we could use field `LoadLatency` in the
+  // MCSchedModel to model that latency.
+  // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
+  // L1D, and it usually already accounts for any extra latency due to data
+  // forwarding.
+  // When doing throughput analysis, `LoadLatency` is likely to
+  // be a better predictor of load latency than instruction latency. This is
+  // particularly true when simulating code with temporal/spatial locality of
+  // memory accesses.
+  // Using `LoadLatency` (instead of the instruction latency) is also expected
+  // to improve the load queue allocation for long latency instructions with
+  // folded memory operands (See PR39829).
+  //
+  // FIXME: On some processors, load/store operations are split into multiple
+  // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
+  // not 256-bit data types. So, a 256-bit load is effectively split into two
+  // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
+  // simplicity, this class optimistically assumes that a load instruction only
+  // consumes one entry in the LoadQueue.  Similarly, store instructions only
+  // consume a single entry in the StoreQueue.
+  // In future, we should reassess the quality of this design, and consider
+  // alternative approaches that let instructions specify the number of
+  // load/store queue entries which they consume at dispatch stage (See
+  // PR39830).
+  SmallSet<unsigned, 16> LoadQueue;
+  SmallSet<unsigned, 16> StoreQueue;
+
+  void assignLQSlot(unsigned Index);
+  void assignSQSlot(unsigned Index);
+  bool isReadyNoAlias(unsigned Index) const;
+
+  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a store barrier. It forces older store to be
+  // executed before newer stores are issued.
+  SmallSet<unsigned, 8> StoreBarriers;
+
+  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a load barrier. It forces older loads to execute
+  // before newer loads are issued.
+  SmallSet<unsigned, 8> LoadBarriers;
+
+  bool isSQEmpty() const { return StoreQueue.empty(); }
+  bool isLQEmpty() const { return LoadQueue.empty(); }
+  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
+  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+
+public:
+  LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
+         bool AssumeNoAlias = false);
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+
+  enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+
+  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
+  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
+  Status isAvailable(const InstRef &IR) const;
+
+  // Allocates load/store queue resources for IR.
+  //
+  // This method assumes that a previous call to `isAvailable(IR)` returned
+  // LSU_AVAILABLE, and that IR is a memory operation.
+  void dispatch(const InstRef &IR);
+
+  // By default, rules are:
+  // 1. A store may not pass a previous store.
+  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+  // 3. A load may pass a previous load.
+  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+  // 5. A load has to wait until an older load barrier is fully executed.
+  // 6. A store has to wait until an older store barrier is fully executed.
+  virtual bool isReady(const InstRef &IR) const;
+
+  // Load and store instructions are tracked by their corresponding queues from
+  // dispatch until the "instruction executed" event.
+  // Only when a load instruction reaches the 'Executed' stage, its value
+  // becomes available to the users. At that point, the load no longer needs to
+  // be tracked by the load queue.
+  // FIXME: For simplicity, we optimistically assume a similar behavior for
+  // store instructions. In practice, store operations don't tend to leave the
+  // store queue until they reach the 'Retired' stage (See PR39830).
+  void onInstructionExecuted(const InstRef &IR);
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_LSUNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
new file mode 100644
index 0000000..3650632
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -0,0 +1,239 @@
+//===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class.  This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_REGISTER_FILE_H
+#define LLVM_MCA_REGISTER_FILE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+class ReadState;
+class WriteState;
+class WriteRef;
+
+/// Manages hardware register files, and tracks register definitions for
+/// register renaming purposes.
+class RegisterFile : public HardwareUnit {
+  const MCRegisterInfo &MRI;
+
+  // class RegisterMappingTracker is a  physical register file (PRF) descriptor.
+  // There is one RegisterMappingTracker for every PRF definition in the
+  // scheduling model.
+  //
+  // An instance of RegisterMappingTracker tracks the number of physical
+  // registers available for renaming. It also tracks  the number of register
+  // moves eliminated per cycle.
+  struct RegisterMappingTracker {
+    // The total number of physical registers that are available in this
+    // register file for register renaming purpouses.  A value of zero for this
+    // field means: this register file has an unbounded number of physical
+    // registers.
+    const unsigned NumPhysRegs;
+    // Number of physical registers that are currently in use.
+    unsigned NumUsedPhysRegs;
+
+    // Maximum number of register moves that can be eliminated by this PRF every
+    // cycle. A value of zero means that there is no limit in the number of
+    // moves which can be eliminated every cycle.
+    const unsigned MaxMoveEliminatedPerCycle;
+
+    // Number of register moves eliminated during this cycle.
+    //
+    // This value is increased by one every time a register move is eliminated.
+    // Every new cycle, this value is reset to zero.
+    // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
+    // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
+    unsigned NumMoveEliminated;
+
+    // If set, move elimination is restricted to zero-register moves only.
+    bool AllowZeroMoveEliminationOnly;
+
+    RegisterMappingTracker(unsigned NumPhysRegisters,
+                           unsigned MaxMoveEliminated = 0U,
+                           bool AllowZeroMoveElimOnly = false)
+        : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
+          MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U),
+          AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {}
+  };
+
+  // A vector of register file descriptors.  This set always contains at least
+  // one entry. Entry at index #0 is reserved.  That entry describes a register
+  // file with an unbounded number of physical registers that "sees" all the
+  // hardware registers declared by the target (i.e. all the register
+  // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is
+  // the target name).
+  //
+  // Users can limit the number of physical registers that are available in
+  // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
+  SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+
+  // This type is used to propagate information about the owner of a register,
+  // and the cost of allocating it in the PRF. Register cost is defined as the
+  // number of physical registers consumed by the PRF to allocate a user
+  // register.
+  //
+  // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
+  // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
+  using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
+
+  // Struct RegisterRenamingInfo is used to map logical registers to register
+  // files.
+  //
+  // There is a RegisterRenamingInfo object for every logical register defined
+  // by the target. RegisteRenamingInfo objects are stored into vector
+  // `RegisterMappings`, and MCPhysReg IDs can be used to reference
+  // elements in that vector.
+  //
+  // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
+  // specifies both the owning PRF, as well as the number of physical registers
+  // consumed at register renaming stage.
+  //
+  // Field `AllowMoveElimination` is set for registers that are used as
+  // destination by optimizable register moves.
+  //
+  // Field `AliasRegID` is set by writes from register moves that have been
+  // eliminated at register renaming stage. A move eliminated at register
+  // renaming stage is effectively bypassed, and its write aliases the source
+  // register definition.
+  struct RegisterRenamingInfo {
+    IndexPlusCostPairTy IndexPlusCost;
+    MCPhysReg RenameAs;
+    MCPhysReg AliasRegID;
+    bool AllowMoveElimination;
+    RegisterRenamingInfo()
+        : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U),
+          AllowMoveElimination(false) {}
+  };
+
+  // RegisterMapping objects are mainly used to track physical register
+  // definitions and resolve data dependencies.
+  //
+  // Every register declared by the Target is associated with an instance of
+  // RegisterMapping. RegisterMapping objects keep track of writes to a logical
+  // register.  That information is used by class RegisterFile to resolve data
+  // dependencies, and correctly set latencies for register uses.
+  //
+  // This implementation does not allow overlapping register files. The only
+  // register file that is allowed to overlap with other register files is
+  // register file #0. If we exclude register #0, every register is "owned" by
+  // at most one register file.
+  using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
+
+  // There is one entry per each register defined by the target.
+  std::vector<RegisterMapping> RegisterMappings;
+
+  // Used to track zero registers. There is one bit for each register defined by
+  // the target. Bits are set for registers that are known to be zero.
+  APInt ZeroRegisters;
+
+  // This method creates a new register file descriptor.
+  // The new register file owns all of the registers declared by register
+  // classes in the 'RegisterClasses' set.
+  //
+  // Processor models allow the definition of RegisterFile(s) via tablegen. For
+  // example, this is a tablegen definition for a x86 register file for
+  // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1
+  // physical register).
+  //
+  //    def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]>
+  //
+  // Here FPRegisterFile contains all the registers defined by register class
+  // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
+  // registers which can be used for register renaming purpose.
+  void addRegisterFile(const MCRegisterFileDesc &RF,
+                       ArrayRef<MCRegisterCostEntry> Entries);
+
+  // Consumes physical registers in each register file specified by the
+  // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
+  void allocatePhysRegs(const RegisterRenamingInfo &Entry,
+                        MutableArrayRef<unsigned> UsedPhysRegs);
+
+  // Releases previously allocated physical registers from the register file(s).
+  // This method is called from `invalidateRegisterMapping()`.
+  void freePhysRegs(const RegisterRenamingInfo &Entry,
+                    MutableArrayRef<unsigned> FreedPhysRegs);
+
+  // Collects writes that are in a RAW dependency with RS.
+  // This method is called from `addRegisterRead()`.
+  void collectWrites(const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes) const;
+
+  // Create an instance of RegisterMappingTracker for every register file
+  // specified by the processor model.
+  // If no register file is specified, then this method creates a default
+  // register file with an unbounded number of physical registers.
+  void initialize(const MCSchedModel &SM, unsigned NumRegs);
+
+public:
+  RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+               unsigned NumRegs = 0);
+
+  // This method updates the register mappings inserting a new register
+  // definition. This method is also responsible for updating the number of
+  // allocated physical registers in each register file modified by the write.
+  // No physical regiser is allocated if this write is from a zero-idiom.
+  void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
+
+  // Collect writes that are in a data dependency with RS, and update RS
+  // internal state.
+  void addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const;
+
+  // Removes write \param WS from the register mappings.
+  // Physical registers may be released to reflect this update.
+  // No registers are released if this write is from a zero-idiom.
+  void removeRegisterWrite(const WriteState &WS,
+                           MutableArrayRef<unsigned> FreedPhysRegs);
+
+  // Returns true if a move from RS to WS can be eliminated.
+  // On success, it updates WriteState by setting flag `WS.isEliminated`.
+  // If RS is a read from a zero register, and WS is eliminated, then
+  // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
+  // reserve a physical register for it.
+  bool tryEliminateMove(WriteState &WS, ReadState &RS);
+
+  // Checks if there are enough physical registers in the register files.
+  // Returns a "response mask" where each bit represents the response from a
+  // different register file.  A mask of all zeroes means that all register
+  // files are available.  Otherwise, the mask can be used to identify which
+  // register file was busy.  This sematic allows us to classify dispatch
+  // stalls caused by the lack of register file resources.
+  //
+  // Current implementation can simulate up to 32 register files (including the
+  // special register file at index #0).
+  unsigned isAvailable(ArrayRef<unsigned> Regs) const;
+
+  // Returns the number of PRFs implemented by this processor.
+  unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
+
+  // Notify each PRF that a new cycle just started.
+  void cycleStart();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_REGISTER_FILE_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
new file mode 100644
index 0000000..3addaaf
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -0,0 +1,425 @@
+//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RESOURCE_MANAGER_H
+#define LLVM_MCA_RESOURCE_MANAGER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+/// Used to notify the internal state of a processor resource.
+///
+/// A processor resource is available if it is not reserved, and there are
+/// available slots in the buffer.  A processor resource is unavailable if it
+/// is either reserved, or the associated buffer is full. A processor resource
+/// with a buffer size of -1 is always available if it is not reserved.
+///
+/// Values of type ResourceStateEvent are returned by method
+/// ResourceState::isBufferAvailable(), which is used to query the internal
+/// state of a resource.
+///
+/// The naming convention for resource state events is:
+///  * Event names start with prefix RS_
+///  * Prefix RS_ is followed by a string describing the actual resource state.
+enum ResourceStateEvent {
+  RS_BUFFER_AVAILABLE,
+  RS_BUFFER_UNAVAILABLE,
+  RS_RESERVED
+};
+
+/// Resource allocation strategy used by hardware scheduler resources.
+class ResourceStrategy {
+  ResourceStrategy(const ResourceStrategy &) = delete;
+  ResourceStrategy &operator=(const ResourceStrategy &) = delete;
+
+public:
+  ResourceStrategy() {}
+  virtual ~ResourceStrategy();
+
+  /// Selects a processor resource unit from a ReadyMask.
+  virtual uint64_t select(uint64_t ReadyMask) = 0;
+
+  /// Called by the ResourceManager when a processor resource group, or a
+  /// processor resource with multiple units has become unavailable.
+  ///
+  /// The default strategy uses this information to bias its selection logic.
+  virtual void used(uint64_t ResourceMask) {}
+};
+
+/// Default resource allocation strategy used by processor resource groups and
+/// processor resources with multiple units.
+class DefaultResourceStrategy final : public ResourceStrategy {
+  /// A Mask of resource unit identifiers.
+  ///
+  /// There is one bit set for every available resource unit.
+  /// It defaults to the value of field ResourceSizeMask in ResourceState.
+  const uint64_t ResourceUnitMask;
+
+  /// A simple round-robin selector for processor resource units.
+  /// Each bit of this mask identifies a sub resource within a group.
+  ///
+  /// As an example, lets assume that this is a default policy for a
+  /// processor resource group composed by the following three units:
+  ///   ResourceA -- 0b001
+  ///   ResourceB -- 0b010
+  ///   ResourceC -- 0b100
+  ///
+  /// Field NextInSequenceMask is used to select the next unit from the set of
+  /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
+  /// this example, it defaults to mask '0b111').
+  ///
+  /// The round-robin selector would firstly select 'ResourceC', then
+  /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
+  /// corresponding bit in NextInSequenceMask is cleared.  For example, if
+  /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
+  /// 0xb011.
+  ///
+  /// When NextInSequenceMask becomes zero, it is automatically reset to the
+  /// default value (i.e. ResourceUnitMask).
+  uint64_t NextInSequenceMask;
+
+  /// This field is used to track resource units that are used (i.e. selected)
+  /// by other groups other than the one associated with this strategy object.
+  ///
+  /// In LLVM processor resource groups are allowed to partially (or fully)
+  /// overlap. That means, a same unit may be visible to multiple groups.
+  /// This field keeps track of uses that have originated from outside of
+  /// this group. The idea is to bias the selection strategy, so that resources
+  /// that haven't been used by other groups get prioritized.
+  ///
+  /// The end goal is to (try to) keep the resource distribution as much uniform
+  /// as possible. By construction, this mask only tracks one-level of resource
+  /// usage. Therefore, this strategy is expected to be less accurate when same
+  /// units are used multiple times by other groups within a single round of
+  /// select.
+  ///
+  /// Note: an LRU selector would have a better accuracy at the cost of being
+  /// slightly more expensive (mostly in terms of runtime cost). Methods
+  /// 'select' and 'used', are always in the hot execution path of llvm-mca.
+  /// Therefore, a slow implementation of 'select' would have a negative impact
+  /// on the overall performance of the tool.
+  uint64_t RemovedFromNextInSequence;
+
+public:
+  DefaultResourceStrategy(uint64_t UnitMask)
+      : ResourceStrategy(), ResourceUnitMask(UnitMask),
+        NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
+  virtual ~DefaultResourceStrategy() = default;
+
+  uint64_t select(uint64_t ReadyMask) override;
+  void used(uint64_t Mask) override;
+};
+
+/// A processor resource descriptor.
+///
+/// There is an instance of this class for every processor resource defined by
+/// the machine scheduling model.
+/// Objects of class ResourceState dynamically track the usage of processor
+/// resource units.
+class ResourceState {
+  /// An index to the MCProcResourceDesc entry in the processor model.
+  const unsigned ProcResourceDescIndex;
+  /// A resource mask. This is generated by the tool with the help of
+  /// function `mca::computeProcResourceMasks' (see Support.h).
+  ///
+  /// Field ResourceMask only has one bit set if this resource state describes a
+  /// processor resource unit (i.e. this is not a group). That means, we can
+  /// quickly check if a resource is a group by simply counting the number of
+  /// bits that are set in the mask.
+  ///
+  /// The most significant bit of a mask (MSB) uniquely identifies a resource.
+  /// Remaining bits are used to describe the composition of a group (Group).
+  ///
+  /// Example (little endian):
+  ///            Resource |  Mask      |  MSB       |  Group
+  ///            ---------+------------+------------+------------
+  ///            A        |  0b000001  |  0b000001  |  0b000000
+  ///                     |            |            |
+  ///            B        |  0b000010  |  0b000010  |  0b000000
+  ///                     |            |            |
+  ///            C        |  0b010000  |  0b010000  |  0b000000
+  ///                     |            |            |
+  ///            D        |  0b110010  |  0b100000  |  0b010010
+  ///
+  /// In this example, resources A, B and C are processor resource units.
+  /// Only resource D is a group resource, and it contains resources B and C.
+  /// That is because MSB(B) and MSB(C) are both contained within Group(D).
+  const uint64_t ResourceMask;
+
+  /// A ProcResource can have multiple units.
+  ///
+  /// For processor resource groups this field is a mask of contained resource
+  /// units. It is obtained from ResourceMask by clearing the highest set bit.
+  /// The number of resource units in a group can be simply computed as the
+  /// population count of this field.
+  ///
+  /// For normal (i.e. non-group) resources, the number of bits set in this mask
+  /// is equivalent to the number of units declared by the processor model (see
+  /// field 'NumUnits' in 'ProcResourceUnits').
+  uint64_t ResourceSizeMask;
+
+  /// A mask of ready units.
+  uint64_t ReadyMask;
+
+  /// Buffered resources will have this field set to a positive number different
+  /// than zero. A buffered resource behaves like a reservation station
+  /// implementing its own buffer for out-of-order execution.
+  ///
+  /// A BufferSize of 1 is used by scheduler resources that force in-order
+  /// execution.
+  ///
+  /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
+  /// Since in-order issue/dispatch resources don't implement buffers, dispatch
+  /// events coincide with issue events.
+  /// Also, no other instruction ca be dispatched/issue while this resource is
+  /// in use. Only when all the "resource cycles" are consumed (after the issue
+  /// event), a new instruction ca be dispatched.
+  const int BufferSize;
+
+  /// Available slots in the buffer (zero, if this is not a buffered resource).
+  unsigned AvailableSlots;
+
+  /// This field is set if this resource is currently reserved.
+  ///
+  /// Resources can be reserved for a number of cycles.
+  /// Instructions can still be dispatched to reserved resources. However,
+  /// istructions dispatched to a reserved resource cannot be issued to the
+  /// underlying units (i.e. pipelines) until the resource is released.
+  bool Unavailable;
+
+  const bool IsAGroup;
+
+  /// Checks for the availability of unit 'SubResMask' in the group.
+  bool isSubResourceReady(uint64_t SubResMask) const {
+    return ReadyMask & SubResMask;
+  }
+
+public:
+  ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
+
+  unsigned getProcResourceID() const { return ProcResourceDescIndex; }
+  uint64_t getResourceMask() const { return ResourceMask; }
+  uint64_t getReadyMask() const { return ReadyMask; }
+  int getBufferSize() const { return BufferSize; }
+
+  bool isBuffered() const { return BufferSize > 0; }
+  bool isInOrder() const { return BufferSize == 1; }
+
+  /// Returns true if this is an in-order dispatch/issue resource.
+  bool isADispatchHazard() const { return BufferSize == 0; }
+  bool isReserved() const { return Unavailable; }
+
+  void setReserved() { Unavailable = true; }
+  void clearReserved() { Unavailable = false; }
+
+  /// Returs true if this resource is not reserved, and if there are at least
+  /// `NumUnits` available units.
+  bool isReady(unsigned NumUnits = 1) const;
+
+  bool isAResourceGroup() const { return IsAGroup; }
+
+  bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
+
+  void markSubResourceAsUsed(uint64_t ID) {
+    assert(isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  void releaseSubResource(uint64_t ID) {
+    assert(!isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  unsigned getNumUnits() const {
+    return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask);
+  }
+
+  /// Checks if there is an available slot in the resource buffer.
+  ///
+  /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
+  /// there is a slot available.
+  ///
+  /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
+  /// is reserved.
+  ///
+  /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
+  ResourceStateEvent isBufferAvailable() const;
+
+  /// Reserve a slot in the buffer.
+  void reserveBuffer() {
+    if (AvailableSlots)
+      AvailableSlots--;
+  }
+
+  /// Release a slot in the buffer.
+  void releaseBuffer() {
+    if (BufferSize > 0)
+      AvailableSlots++;
+    assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+  }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// A resource unit identifier.
+///
+/// This is used to identify a specific processor resource unit using a pair
+/// of indices where the 'first' index is a processor resource mask, and the
+/// 'second' index is an index for a "sub-resource" (i.e. unit).
+typedef std::pair<uint64_t, uint64_t> ResourceRef;
+
+// First: a MCProcResourceDesc index identifying a buffered resource.
+// Second: max number of buffer entries used in this resource.
+typedef std::pair<unsigned, unsigned> BufferUsageEntry;
+
+/// A resource manager for processor resource units and groups.
+///
+/// This class owns all the ResourceState objects, and it is responsible for
+/// acting on requests from a Scheduler by updating the internal state of
+/// ResourceState objects.
+/// This class doesn't know about instruction itineraries and functional units.
+/// In future, it can be extended to support itineraries too through the same
+/// public interface.
+class ResourceManager {
+  // Set of resources available on the subtarget.
+  //
+  // There is an instance of ResourceState for every resource declared by the
+  // target scheduling model.
+  //
+  // Elements of this vector are ordered by resource kind. In particular,
+  // resource units take precedence over resource groups.
+  //
+  // The index of a processor resource in this vector depends on the value of
+  // its mask (see the description of field ResourceState::ResourceMask).  In
+  // particular, it is computed as the position of the most significant bit set
+  // (MSB) in the mask plus one (since we want to ignore the invalid resource
+  // descriptor at index zero).
+  //
+  // Example (little endian):
+  //
+  //             Resource | Mask    |  MSB    | Index
+  //             ---------+---------+---------+-------
+  //                 A    | 0b00001 | 0b00001 |   1
+  //                      |         |         |
+  //                 B    | 0b00100 | 0b00100 |   3
+  //                      |         |         |
+  //                 C    | 0b10010 | 0b10000 |   5
+  //
+  //
+  // The same index is also used to address elements within vector `Strategies`
+  // and vector `Resource2Groups`.
+  std::vector<std::unique_ptr<ResourceState>> Resources;
+  std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
+
+  // Used to quickly identify groups that own a particular resource unit.
+  std::vector<uint64_t> Resource2Groups;
+
+  // A table to map processor resource IDs to processor resource masks.
+  SmallVector<uint64_t, 8> ProcResID2Mask;
+
+  // Keeps track of which resources are busy, and how many cycles are left
+  // before those become usable again.
+  SmallDenseMap<ResourceRef, unsigned> BusyResources;
+
+  // Set of processor resource units available on the target.
+  uint64_t ProcResUnitMask;
+
+  // Set of processor resource units that are available during this cycle.
+  uint64_t AvailableProcResUnits;
+
+  // Set of processor resource groups that are currently reserved.
+  uint64_t ReservedResourceGroups;
+
+  // Returns the actual resource unit that will be used.
+  ResourceRef selectPipe(uint64_t ResourceID);
+
+  void use(const ResourceRef &RR);
+  void release(const ResourceRef &RR);
+
+  unsigned getNumUnits(uint64_t ResourceID) const;
+
+  // Overrides the selection strategy for the processor resource with the given
+  // mask.
+  void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                             uint64_t ResourceMask);
+
+public:
+  ResourceManager(const MCSchedModel &SM);
+  virtual ~ResourceManager() = default;
+
+  // Overrides the selection strategy for the resource at index ResourceID in
+  // the MCProcResourceDesc table.
+  void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
+                         unsigned ResourceID) {
+    assert(ResourceID < ProcResID2Mask.size() &&
+           "Invalid resource index in input!");
+    return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
+  }
+
+  // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
+  // there are enough available slots in the buffers.
+  ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+
+  // Return the processor resource identifier associated to this Mask.
+  unsigned resolveResourceMask(uint64_t Mask) const;
+
+  // Consume a slot in every buffered resource from array 'Buffers'. Resource
+  // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+  void reserveBuffers(ArrayRef<uint64_t> Buffers);
+
+  // Release buffer entries previously allocated by method reserveBuffers.
+  void releaseBuffers(ArrayRef<uint64_t> Buffers);
+
+  // Reserve a processor resource. A reserved resource is not available for
+  // instruction issue until it is released.
+  void reserveResource(uint64_t ResourceID);
+
+  // Release a previously reserved processor resource.
+  void releaseResource(uint64_t ResourceID);
+
+  // Returns a zero mask if resources requested by Desc are all available during
+  // this cycle. It returns a non-zero mask value only if there are unavailable
+  // processor resources; each bit set in the mask represents a busy processor
+  // resource unit or a reserved processor resource group.
+  uint64_t checkAvailability(const InstrDesc &Desc) const;
+
+  uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
+  uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
+
+  void issueInstruction(
+      const InstrDesc &Desc,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+  void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
+
+#ifndef NDEBUG
+  void dump() const {
+    for (const std::unique_ptr<ResourceState> &Resource : Resources)
+      Resource->dump();
+  }
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RESOURCE_MANAGER_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
new file mode 100644
index 0000000..0629014
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -0,0 +1,103 @@
+//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#define LLVM_MCA_RETIRE_CONTROL_UNIT_H
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include <vector>
+
+namespace llvm {
+namespace mca {
+
+/// This class tracks which instructions are in-flight (i.e., dispatched but not
+/// retired) in the OoO backend.
+//
+/// This class checks on every cycle if/which instructions can be retired.
+/// Instructions are retired in program order.
+/// In the event of an instruction being retired, the pipeline that owns
+/// this RetireControlUnit (RCU) gets notified.
+///
+/// On instruction retired, register updates are all architecturally
+/// committed, and any physicall registers previously allocated for the
+/// retired instruction are freed.
+struct RetireControlUnit : public HardwareUnit {
+  // A RUToken is created by the RCU for every instruction dispatched to the
+  // schedulers.  These "tokens" are managed by the RCU in its token Queue.
+  //
+  // On every cycle ('cycleEvent'), the RCU iterates through the token queue
+  // looking for any token with its 'Executed' flag set.  If a token has that
+  // flag set, then the instruction has reached the write-back stage and will
+  // be retired by the RCU.
+  //
+  // 'NumSlots' represents the number of entries consumed by the instruction in
+  // the reorder buffer. Those entries will become available again once the
+  // instruction is retired.
+  //
+  // Note that the size of the reorder buffer is defined by the scheduling
+  // model via field 'NumMicroOpBufferSize'.
+  struct RUToken {
+    InstRef IR;
+    unsigned NumSlots; // Slots reserved to this instruction.
+    bool Executed;     // True if the instruction is past the WB stage.
+  };
+
+private:
+  unsigned NextAvailableSlotIdx;
+  unsigned CurrentInstructionSlotIdx;
+  unsigned AvailableSlots;
+  unsigned MaxRetirePerCycle; // 0 means no limit.
+  std::vector<RUToken> Queue;
+
+public:
+  RetireControlUnit(const MCSchedModel &SM);
+
+  bool isEmpty() const { return AvailableSlots == Queue.size(); }
+  bool isAvailable(unsigned Quantity = 1) const {
+    // Some instructions may declare a number of uOps which exceeds the size
+    // of the reorder buffer. To avoid problems, cap the amount of slots to
+    // the size of the reorder buffer.
+    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+
+    // Further normalize the number of micro opcodes for instructions that
+    // declare zero opcodes. This should match the behavior of method
+    // reserveSlot().
+    Quantity = std::max(Quantity, 1U);
+    return AvailableSlots >= Quantity;
+  }
+
+  unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
+
+  // Reserves a number of slots, and returns a new token.
+  unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
+
+  // Return the current token from the RCU's circular token queue.
+  const RUToken &peekCurrentToken() const;
+
+  // Advance the pointer to the next token in the circular token queue.
+  void consumeCurrentToken();
+
+  // Update the RCU token to represent the executed state.
+  void onInstructionExecuted(unsigned TokenID);
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
new file mode 100644
index 0000000..f1cfcbe
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -0,0 +1,250 @@
+//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A scheduler for Processor Resource Units and Processor Resource Groups.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SCHEDULER_H
+#define LLVM_MCA_SCHEDULER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStrategy {
+public:
+  SchedulerStrategy() = default;
+  virtual ~SchedulerStrategy();
+
+  /// Returns true if Lhs should take priority over Rhs.
+  ///
+  /// This method is used by class Scheduler to select the "best" ready
+  /// instruction to issue to the underlying pipelines.
+  virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
+};
+
+/// Default instruction selection strategy used by class Scheduler.
+class DefaultSchedulerStrategy : public SchedulerStrategy {
+  /// This method ranks instructions based on their age, and the number of known
+  /// users. The lower the rank value, the better.
+  int computeRank(const InstRef &Lhs) const {
+    return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
+  }
+
+public:
+  DefaultSchedulerStrategy() = default;
+  virtual ~DefaultSchedulerStrategy();
+
+  bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
+    int LhsRank = computeRank(Lhs);
+    int RhsRank = computeRank(Rhs);
+
+    /// Prioritize older instructions over younger instructions to minimize the
+    /// pressure on the reorder buffer.
+    if (LhsRank == RhsRank)
+      return Lhs.getSourceIndex() < Rhs.getSourceIndex();
+    return LhsRank < RhsRank;
+  }
+};
+
+/// Class Scheduler is responsible for issuing instructions to pipeline
+/// resources.
+///
+/// Internally, it delegates to a ResourceManager the management of processor
+/// resources. This class is also responsible for tracking the progress of
+/// instructions from the dispatch stage, until the write-back stage.
+///
+class Scheduler : public HardwareUnit {
+  LSUnit &LSU;
+
+  // Instruction selection strategy for this Scheduler.
+  std::unique_ptr<SchedulerStrategy> Strategy;
+
+  // Hardware resources that are managed by this scheduler.
+  std::unique_ptr<ResourceManager> Resources;
+
+  // Instructions dispatched to the Scheduler are internally classified based on
+  // the instruction stage (see Instruction::InstrStage).
+  //
+  // An Instruction dispatched to the Scheduler is added to the WaitSet if not
+  // all its register operands are available, and at least one latency is unknown.
+  // By construction, the WaitSet only contains instructions that are in the
+  // IS_DISPATCHED stage.
+  //
+  // An Instruction transitions from the WaitSet to the PendingSet if the
+  // instruction is not ready yet, but the latency of every register read is known.
+  // Instructions in the PendingSet are expected to be in the IS_PENDING stage.
+  //
+  // Instructions in the PendingSet are immediately dominated only by
+  // instructions that have already been issued to the underlying pipelines.
+  // In the presence of bottlenecks caused by data dependencies, the PendingSet
+  // can be inspected to identify problematic data dependencies between
+  // instructions.
+  //
+  // An instruction is moved to the ReadySet when all register operands become
+  // available, and all memory dependencies are met.  Instructions that are
+  // moved from the PendingSet to the ReadySet transition in state from
+  // 'IS_PENDING' to 'IS_READY'.
+  //
+  // On every cycle, the Scheduler checks if it can promote instructions from the
+  // PendingSet to the ReadySet.
+  //
+  // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts
+  // exection. This event also causes an instruction state transition (i.e. from
+  // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet
+  // only when it reaches the write-back stage.
+  std::vector<InstRef> WaitSet;
+  std::vector<InstRef> PendingSet;
+  std::vector<InstRef> ReadySet;
+  std::vector<InstRef> IssuedSet;
+
+  // A mask of busy resource units. It defaults to the empty set (i.e. a zero
+  // mask), and it is cleared at the beginning of every cycle.
+  // It is updated every time the scheduler fails to issue an instruction from
+  // the ready set due to unavailable pipeline resources.
+  // Each bit of the mask represents an unavailable resource.
+  uint64_t BusyResourceUnits;
+
+  /// Verify the given selection strategy and set the Strategy member
+  /// accordingly.  If no strategy is provided, the DefaultSchedulerStrategy is
+  /// used.
+  void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
+
+  /// Issue an instruction without updating the ready queue.
+  void issueInstructionImpl(
+      InstRef &IR,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+  // Identify instructions that have finished executing, and remove them from
+  // the IssuedSet. References to executed instructions are added to input
+  // vector 'Executed'.
+  void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
+
+  // Try to promote instructions from the PendingSet to the ReadySet.
+  // Add promoted instructions to the 'Ready' vector in input.
+  // Returns true if at least one instruction was promoted.
+  bool promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+  // Try to promote instructions from the WaitSet to the PendingSet.
+  // Returns true if at least one instruction was promoted.
+  bool promoteToPendingSet();
+
+public:
+  Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+      : Scheduler(Model, Lsu, nullptr) {}
+
+  Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+                  std::move(SelectStrategy)) {}
+
+  Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0) {
+    initializeStrategy(std::move(SelectStrategy));
+  }
+
+  // Stalls generated by the scheduler.
+  enum Status {
+    SC_AVAILABLE,
+    SC_LOAD_QUEUE_FULL,
+    SC_STORE_QUEUE_FULL,
+    SC_BUFFERS_FULL,
+    SC_DISPATCH_GROUP_STALL,
+  };
+
+  /// Check if the instruction in 'IR' can be dispatched and returns an answer
+  /// in the form of a Status value.
+  ///
+  /// The DispatchStage is responsible for querying the Scheduler before
+  /// dispatching new instructions. This routine is used for performing such
+  /// a query.  If the instruction 'IR' can be dispatched, then true is
+  /// returned, otherwise false is returned with Event set to the stall type.
+  /// Internally, it also checks if the load/store unit is available.
+  Status isAvailable(const InstRef &IR) const;
+
+  /// Reserves buffer and LSUnit queue resources that are necessary to issue
+  /// this instruction.
+  ///
+  /// Returns true if instruction IR is ready to be issued to the underlying
+  /// pipelines. Note that this operation cannot fail; it assumes that a
+  /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
+  void dispatch(const InstRef &IR);
+
+  /// Returns true if IR is ready to be executed by the underlying pipelines.
+  /// This method assumes that IR has been previously dispatched.
+  bool isReady(const InstRef &IR) const;
+
+  /// Issue an instruction and populates a vector of used pipeline resources,
+  /// and a vector of instructions that transitioned to the ready state as a
+  /// result of this event.
+  void issueInstruction(
+      InstRef &IR,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+      SmallVectorImpl<InstRef> &Ready);
+
+  /// Returns true if IR has to be issued immediately, or if IR is a zero
+  /// latency instruction.
+  bool mustIssueImmediately(const InstRef &IR) const;
+
+  /// This routine notifies the Scheduler that a new cycle just started.
+  ///
+  /// It notifies the underlying ResourceManager that a new cycle just started.
+  /// Vector `Freed` is populated with resourceRef related to resources that
+  /// have changed in state, and that are now available to new instructions.
+  /// Instructions executed are added to vector Executed, while vector Ready is
+  /// populated with instructions that have become ready in this new cycle.
+  void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+                  SmallVectorImpl<InstRef> &Ready,
+                  SmallVectorImpl<InstRef> &Executed);
+
+  /// Convert a resource mask into a valid llvm processor resource identifier.
+  unsigned getResourceID(uint64_t Mask) const {
+    return Resources->resolveResourceMask(Mask);
+  }
+
+  /// Select the next instruction to issue from the ReadySet. Returns an invalid
+  /// instruction reference if there are no ready instructions, or if processor
+  /// resources are not available.
+  InstRef select();
+
+  /// Returns a mask of busy resources. Each bit of the mask identifies a unique
+  /// processor resource unit. In the absence of bottlenecks caused by resource
+  /// pressure, the mask value returned by this method is always zero.
+  uint64_t getBusyResourceUnits() const { return BusyResourceUnits; }
+  bool arePipelinesFullyUsed() const {
+    return !Resources->getAvailableProcResUnits();
+  }
+  bool isReadySetEmpty() const { return ReadySet.empty(); }
+  bool isWaitSetEmpty() const { return WaitSet.empty(); }
+
+#ifndef NDEBUG
+  // Update the ready queues.
+  void dump() const;
+
+  // This routine performs a sanity check.  This routine should only be called
+  // when we know that 'IR' is not in the scheduler's instruction queues.
+  void sanityCheck(const InstRef &IR) const {
+    assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
+    assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
+    assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+  }
+#endif // !NDEBUG
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SCHEDULER_H
diff --git a/linux-x64/clang/include/llvm/MCA/InstrBuilder.h b/linux-x64/clang/include/llvm/MCA/InstrBuilder.h
new file mode 100644
index 0000000..6900163
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/InstrBuilder.h
@@ -0,0 +1,76 @@
+//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A builder class for instructions that are statically analyzed by llvm-mca.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRBUILDER_H
+#define LLVM_MCA_INSTRBUILDER_H
+
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+/// A builder class that knows how to construct Instruction objects.
+///
+/// Every llvm-mca Instruction is described by an object of class InstrDesc.
+/// An InstrDesc describes which registers are read/written by the instruction,
+/// as well as the instruction latency and hardware resources consumed.
+///
+/// This class is used by the tool to construct Instructions and instruction
+/// descriptors (i.e. InstrDesc objects).
+/// Information from the machine scheduling model is used to identify processor
+/// resources that are consumed by an instruction.
+class InstrBuilder {
+  const MCSubtargetInfo &STI;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+  const MCInstrAnalysis *MCIA;
+  SmallVector<uint64_t, 8> ProcResourceMasks;
+
+  DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
+  DenseMap<const MCInst *, std::unique_ptr<const InstrDesc>> VariantDescriptors;
+
+  bool FirstCallInst;
+  bool FirstReturnInst;
+
+  Expected<const InstrDesc &> createInstrDescImpl(const MCInst &MCI);
+  Expected<const InstrDesc &> getOrCreateInstrDesc(const MCInst &MCI);
+
+  InstrBuilder(const InstrBuilder &) = delete;
+  InstrBuilder &operator=(const InstrBuilder &) = delete;
+
+  void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
+
+public:
+  InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+               const MCRegisterInfo &RI, const MCInstrAnalysis *IA);
+
+  void clear() {
+    VariantDescriptors.shrink_and_clear();
+    FirstCallInst = true;
+    FirstReturnInst = true;
+  }
+
+  Expected<std::unique_ptr<Instruction>> createInstruction(const MCInst &MCI);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRBUILDER_H
diff --git a/linux-x64/clang/include/llvm/MCA/Instruction.h b/linux-x64/clang/include/llvm/MCA/Instruction.h
new file mode 100644
index 0000000..658b7fe
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Instruction.h
@@ -0,0 +1,570 @@
+//===--------------------- Instruction.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines abstractions used by the Pipeline to model register reads,
+/// register writes and instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRUCTION_H
+#define LLVM_MCA_INSTRUCTION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+
+#ifndef NDEBUG
+#include "llvm/Support/raw_ostream.h"
+#endif
+
+#include <memory>
+
+namespace llvm {
+
+namespace mca {
+
+constexpr int UNKNOWN_CYCLES = -512;
+
+/// A register write descriptor.
+struct WriteDescriptor {
+  // Operand index. The index is negative for implicit writes only.
+  // For implicit writes, the actual operand index is computed performing
+  // a bitwise not of the OpIndex.
+  int OpIndex;
+  // Write latency. Number of cycles before write-back stage.
+  unsigned Latency;
+  // This field is set to a value different than zero only if this
+  // is an implicit definition.
+  unsigned RegisterID;
+  // Instruction itineraries would set this field to the SchedClass ID.
+  // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
+  // element associated to this write.
+  // When computing read latencies, this value is matched against the
+  // "ReadAdvance" information. The hardware backend may implement
+  // dedicated forwarding paths to quickly propagate write results to dependent
+  // instructions waiting in the reservation station (effectively bypassing the
+  // write-back stage).
+  unsigned SClassOrWriteResourceID;
+  // True only if this is a write obtained from an optional definition.
+  // Optional definitions are allowed to reference regID zero (i.e. "no
+  // register").
+  bool IsOptionalDef;
+
+  bool isImplicitWrite() const { return OpIndex < 0; };
+};
+
+/// A register read descriptor.
+struct ReadDescriptor {
+  // A MCOperand index. This is used by the Dispatch logic to identify register
+  // reads. Implicit reads have negative indices. The actual operand index of an
+  // implicit read is the bitwise not of field OpIndex.
+  int OpIndex;
+  // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit
+  // uses always come first in the sequence of uses.
+  unsigned UseIndex;
+  // This field is only set if this is an implicit read.
+  unsigned RegisterID;
+  // Scheduling Class Index. It is used to query the scheduling model for the
+  // MCSchedClassDesc object.
+  unsigned SchedClassID;
+
+  bool isImplicitRead() const { return OpIndex < 0; };
+};
+
+class ReadState;
+
+/// Tracks uses of a register definition (e.g. register write).
+///
+/// Each implicit/explicit register write is associated with an instance of
+/// this class. A WriteState object tracks the dependent users of a
+/// register write. It also tracks how many cycles are left before the write
+/// back stage.
+class WriteState {
+  const WriteDescriptor *WD;
+  // On instruction issue, this field is set equal to the write latency.
+  // Before instruction issue, this field defaults to -512, a special
+  // value that represents an "unknown" number of cycles.
+  int CyclesLeft;
+
+  // Actual register defined by this write. This field is only used
+  // to speedup queries on the register file.
+  // For implicit writes, this field always matches the value of
+  // field RegisterID from WD.
+  unsigned RegisterID;
+
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
+
+  // True if this write implicitly clears the upper portion of RegisterID's
+  // super-registers.
+  bool ClearsSuperRegs;
+
+  // True if this write is from a dependency breaking zero-idiom instruction.
+  bool WritesZero;
+
+  // True if this write has been eliminated at register renaming stage.
+  // Example: a register move doesn't consume scheduler/pipleline resources if
+  // it is eliminated at register renaming stage. It still consumes
+  // decode bandwidth, and ROB entries.
+  bool IsEliminated;
+
+  // This field is set if this is a partial register write, and it has a false
+  // dependency on any previous write of the same register (or a portion of it).
+  // DependentWrite must be able to complete before this write completes, so
+  // that we don't break the WAW, and the two writes can be merged together.
+  const WriteState *DependentWrite;
+
+  // A partial write that is in a false dependency with this write.
+  WriteState *PartialWrite;
+
+  unsigned DependentWriteCyclesLeft;
+
+  // A list of dependent reads. Users is a set of dependent
+  // reads. A dependent read is added to the set only if CyclesLeft
+  // is "unknown". As soon as CyclesLeft is 'known', each user in the set
+  // gets notified with the actual CyclesLeft.
+
+  // The 'second' element of a pair is a "ReadAdvance" number of cycles.
+  SmallVector<std::pair<ReadState *, int>, 4> Users;
+
+public:
+  WriteState(const WriteDescriptor &Desc, unsigned RegID,
+             bool clearsSuperRegs = false, bool writesZero = false)
+      : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
+        ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+        IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+        DependentWriteCyclesLeft(0) {}
+
+  WriteState(const WriteState &Other) = default;
+  WriteState &operator=(const WriteState &Other) = default;
+
+  int getCyclesLeft() const { return CyclesLeft; }
+  unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
+  unsigned getRegisterID() const { return RegisterID; }
+  unsigned getRegisterFileID() const { return PRFID; }
+  unsigned getLatency() const { return WD->Latency; }
+
+  void addUser(ReadState *Use, int ReadAdvance);
+  void addUser(WriteState *Use);
+
+  unsigned getDependentWriteCyclesLeft() const {
+    return DependentWriteCyclesLeft;
+  }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = Users.size();
+    if (PartialWrite)
+      ++NumUsers;
+    return NumUsers;
+  }
+
+  bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+  bool isWriteZero() const { return WritesZero; }
+  bool isEliminated() const { return IsEliminated; }
+
+  bool isReady() const {
+    if (getDependentWrite())
+      return false;
+    unsigned CyclesLeft = getDependentWriteCyclesLeft();
+    return !CyclesLeft || CyclesLeft < getLatency();
+  }
+
+  bool isExecuted() const {
+    return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+  }
+
+  const WriteState *getDependentWrite() const { return DependentWrite; }
+  void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+  void writeStartEvent(unsigned Cycles) {
+    DependentWriteCyclesLeft = Cycles;
+    DependentWrite = nullptr;
+  }
+
+  void setWriteZero() { WritesZero = true; }
+  void setEliminated() {
+    assert(Users.empty() && "Write is in an inconsistent state.");
+    CyclesLeft = 0;
+    IsEliminated = true;
+  }
+
+  void setPRF(unsigned PRF) { PRFID = PRF; }
+
+  // On every cycle, update CyclesLeft and notify dependent users.
+  void cycleEvent();
+  void onInstructionIssued();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// Tracks register operand latency in cycles.
+///
+/// A read may be dependent on more than one write. This occurs when some
+/// writes only partially update the register associated to this read.
+class ReadState {
+  const ReadDescriptor *RD;
+  // Physical register identified associated to this read.
+  unsigned RegisterID;
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
+  // Number of writes that contribute to the definition of RegisterID.
+  // In the absence of partial register updates, the number of DependentWrites
+  // cannot be more than one.
+  unsigned DependentWrites;
+  // Number of cycles left before RegisterID can be read. This value depends on
+  // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES.
+  // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of
+  // every dependent write is known.
+  int CyclesLeft;
+  // This field is updated on every writeStartEvent(). When the number of
+  // dependent writes (i.e. field DependentWrite) is zero, this value is
+  // propagated to field CyclesLeft.
+  unsigned TotalCycles;
+  // This field is set to true only if there are no dependent writes, and
+  // there are no `CyclesLeft' to wait.
+  bool IsReady;
+  // True if this is a read from a known zero register.
+  bool IsZero;
+  // True if this register read is from a dependency-breaking instruction.
+  bool IndependentFromDef;
+
+public:
+  ReadState(const ReadDescriptor &Desc, unsigned RegID)
+      : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+        IsZero(false), IndependentFromDef(false) {}
+
+  const ReadDescriptor &getDescriptor() const { return *RD; }
+  unsigned getSchedClass() const { return RD->SchedClassID; }
+  unsigned getRegisterID() const { return RegisterID; }
+  unsigned getRegisterFileID() const { return PRFID; }
+
+  bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
+  bool isReady() const { return IsReady; }
+  bool isImplicitRead() const { return RD->isImplicitRead(); }
+
+  bool isIndependentFromDef() const { return IndependentFromDef; }
+  void setIndependentFromDef() { IndependentFromDef = true; }
+
+  void cycleEvent();
+  void writeStartEvent(unsigned Cycles);
+  void setDependentWrites(unsigned Writes) {
+    DependentWrites = Writes;
+    IsReady = !Writes;
+  }
+
+  bool isReadZero() const { return IsZero; }
+  void setReadZero() { IsZero = true; }
+  void setPRF(unsigned ID) { PRFID = ID; }
+};
+
+/// A sequence of cycles.
+///
+/// This class can be used as a building block to construct ranges of cycles.
+class CycleSegment {
+  unsigned Begin; // Inclusive.
+  unsigned End;   // Exclusive.
+  bool Reserved;  // Resources associated to this segment must be reserved.
+
+public:
+  CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false)
+      : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {}
+
+  bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; }
+  bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; }
+  bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; }
+  bool overlaps(const CycleSegment &CS) const {
+    return !startsAfter(CS) && !endsBefore(CS);
+  }
+  bool isExecuting() const { return Begin == 0 && End != 0; }
+  bool isExecuted() const { return End == 0; }
+  bool operator<(const CycleSegment &Other) const {
+    return Begin < Other.Begin;
+  }
+  CycleSegment &operator--(void) {
+    if (Begin)
+      Begin--;
+    if (End)
+      End--;
+    return *this;
+  }
+
+  bool isValid() const { return Begin <= End; }
+  unsigned size() const { return End - Begin; };
+  void subtract(unsigned Cycles) {
+    assert(End >= Cycles);
+    End -= Cycles;
+  }
+
+  unsigned begin() const { return Begin; }
+  unsigned end() const { return End; }
+  void setEnd(unsigned NewEnd) { End = NewEnd; }
+  bool isReserved() const { return Reserved; }
+  void setReserved() { Reserved = true; }
+};
+
+/// Helper used by class InstrDesc to describe how hardware resources
+/// are used.
+///
+/// This class describes how many resource units of a specific resource kind
+/// (and how many cycles) are "used" by an instruction.
+struct ResourceUsage {
+  CycleSegment CS;
+  unsigned NumUnits;
+  ResourceUsage(CycleSegment Cycles, unsigned Units = 1)
+      : CS(Cycles), NumUnits(Units) {}
+  unsigned size() const { return CS.size(); }
+  bool isReserved() const { return CS.isReserved(); }
+  void setReserved() { CS.setReserved(); }
+};
+
+/// An instruction descriptor
+struct InstrDesc {
+  SmallVector<WriteDescriptor, 4> Writes; // Implicit writes are at the end.
+  SmallVector<ReadDescriptor, 4> Reads;   // Implicit reads are at the end.
+
+  // For every resource used by an instruction of this kind, this vector
+  // reports the number of "consumed cycles".
+  SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
+
+  // A list of buffered resources consumed by this instruction.
+  SmallVector<uint64_t, 4> Buffers;
+
+  unsigned UsedProcResUnits;
+  unsigned UsedProcResGroups;
+
+  unsigned MaxLatency;
+  // Number of MicroOps for this instruction.
+  unsigned NumMicroOps;
+  // SchedClassID used to construct this InstrDesc.
+  // This information is currently used by views to do fast queries on the
+  // subtarget when computing the reciprocal throughput.
+  unsigned SchedClassID;
+
+  bool MayLoad;
+  bool MayStore;
+  bool HasSideEffects;
+  bool BeginGroup;
+  bool EndGroup;
+
+  // True if all buffered resources are in-order, and there is at least one
+  // buffer which is a dispatch hazard (BufferSize = 0).
+  bool MustIssueImmediately;
+
+  // A zero latency instruction doesn't consume any scheduler resources.
+  bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
+
+  InstrDesc() = default;
+  InstrDesc(const InstrDesc &Other) = delete;
+  InstrDesc &operator=(const InstrDesc &Other) = delete;
+};
+
+/// Base class for instructions consumed by the simulation pipeline.
+///
+/// This class tracks data dependencies as well as generic properties
+/// of the instruction.
+class InstructionBase {
+  const InstrDesc &Desc;
+
+  // This field is set for instructions that are candidates for move
+  // elimination. For more information about move elimination, see the
+  // definition of RegisterMappingTracker in RegisterFile.h
+  bool IsOptimizableMove;
+
+  // Output dependencies.
+  // One entry per each implicit and explicit register definition.
+  SmallVector<WriteState, 4> Defs;
+
+  // Input dependencies.
+  // One entry per each implicit and explicit register use.
+  SmallVector<ReadState, 4> Uses;
+
+public:
+  InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+
+  SmallVectorImpl<WriteState> &getDefs() { return Defs; }
+  const ArrayRef<WriteState> getDefs() const { return Defs; }
+  SmallVectorImpl<ReadState> &getUses() { return Uses; }
+  const ArrayRef<ReadState> getUses() const { return Uses; }
+  const InstrDesc &getDesc() const { return Desc; }
+
+  unsigned getLatency() const { return Desc.MaxLatency; }
+
+  bool hasDependentUsers() const {
+    return any_of(Defs,
+                  [](const WriteState &Def) { return Def.getNumUsers() > 0; });
+  }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = 0;
+    for (const WriteState &Def : Defs)
+      NumUsers += Def.getNumUsers();
+    return NumUsers;
+  }
+
+  // Returns true if this instruction is a candidate for move elimination.
+  bool isOptimizableMove() const { return IsOptimizableMove; }
+  void setOptimizableMove() { IsOptimizableMove = true; }
+};
+
+/// An instruction propagated through the simulated instruction pipeline.
+///
+/// This class is used to monitor changes to the internal state of instructions
+/// that are sent to the various components of the simulated hardware pipeline.
+class Instruction : public InstructionBase {
+  enum InstrStage {
+    IS_INVALID,    // Instruction in an invalid state.
+    IS_DISPATCHED, // Instruction dispatched but operands are not ready.
+    IS_PENDING,    // Instruction is not ready, but operand latency is known.
+    IS_READY,      // Instruction dispatched and operands ready.
+    IS_EXECUTING,  // Instruction issued.
+    IS_EXECUTED,   // Instruction executed. Values are written back.
+    IS_RETIRED     // Instruction retired.
+  };
+
+  // The current instruction stage.
+  enum InstrStage Stage;
+
+  // This value defaults to the instruction latency. This instruction is
+  // considered executed when field CyclesLeft goes to zero.
+  int CyclesLeft;
+
+  // Retire Unit token ID for this instruction.
+  unsigned RCUTokenID;
+
+public:
+  Instruction(const InstrDesc &D)
+      : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
+        RCUTokenID(0) {}
+
+  unsigned getRCUTokenID() const { return RCUTokenID; }
+  int getCyclesLeft() const { return CyclesLeft; }
+
+  // Transition to the dispatch stage, and assign a RCUToken to this
+  // instruction. The RCUToken is used to track the completion of every
+  // register write performed by this instruction.
+  void dispatch(unsigned RCUTokenID);
+
+  // Instruction issued. Transition to the IS_EXECUTING state, and update
+  // all the definitions.
+  void execute();
+
+  // Force a transition from the IS_DISPATCHED state to the IS_READY or
+  // IS_PENDING state. State transitions normally occur either at the beginning
+  // of a new cycle (see method cycleEvent()), or as a result of another issue
+  // event. This method is called every time the instruction might have changed
+  // in state. It internally delegates to method updateDispatched() and
+  // updateWaiting().
+  void update();
+  bool updateDispatched();
+  bool updatePending();
+
+  bool isDispatched() const { return Stage == IS_DISPATCHED; }
+  bool isPending() const { return Stage == IS_PENDING; }
+  bool isReady() const { return Stage == IS_READY; }
+  bool isExecuting() const { return Stage == IS_EXECUTING; }
+  bool isExecuted() const { return Stage == IS_EXECUTED; }
+  bool isRetired() const { return Stage == IS_RETIRED; }
+
+  bool isEliminated() const {
+    return isReady() && getDefs().size() &&
+           all_of(getDefs(),
+                  [](const WriteState &W) { return W.isEliminated(); });
+  }
+
+  // Forces a transition from state IS_DISPATCHED to state IS_EXECUTED.
+  void forceExecuted();
+
+  void retire() {
+    assert(isExecuted() && "Instruction is in an invalid state!");
+    Stage = IS_RETIRED;
+  }
+
+  void cycleEvent();
+};
+
+/// An InstRef contains both a SourceMgr index and Instruction pair.  The index
+/// is used as a unique identifier for the instruction.  MCA will make use of
+/// this index as a key throughout MCA.
+class InstRef {
+  std::pair<unsigned, Instruction *> Data;
+
+public:
+  InstRef() : Data(std::make_pair(0, nullptr)) {}
+  InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
+
+  bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+
+  unsigned getSourceIndex() const { return Data.first; }
+  Instruction *getInstruction() { return Data.second; }
+  const Instruction *getInstruction() const { return Data.second; }
+
+  /// Returns true if this references a valid instruction.
+  operator bool() const { return Data.second != nullptr; }
+
+  /// Invalidate this reference.
+  void invalidate() { Data.second = nullptr; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const { OS << getSourceIndex(); }
+#endif
+};
+
+#ifndef NDEBUG
+inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
+  IR.print(OS);
+  return OS;
+}
+#endif
+
+/// A reference to a register write.
+///
+/// This class is mainly used by the register file to describe register
+/// mappings. It correlates a register write to the source index of the
+/// defining instruction.
+class WriteRef {
+  std::pair<unsigned, WriteState *> Data;
+  static const unsigned INVALID_IID;
+
+public:
+  WriteRef() : Data(INVALID_IID, nullptr) {}
+  WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
+
+  unsigned getSourceIndex() const { return Data.first; }
+  const WriteState *getWriteState() const { return Data.second; }
+  WriteState *getWriteState() { return Data.second; }
+  void invalidate() { Data.second = nullptr; }
+  bool isWriteZero() const {
+    assert(isValid() && "Invalid null WriteState found!");
+    return getWriteState()->isWriteZero();
+  }
+
+  /// Returns true if this register write has been executed, and the new
+  /// register value is therefore available to users.
+  bool isAvailable() const {
+    if (getSourceIndex() == INVALID_IID)
+      return false;
+    const WriteState *WS = getWriteState();
+    return !WS || WS->isExecuted();
+  }
+
+  bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+  bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRUCTION_H
diff --git a/linux-x64/clang/include/llvm/MCA/Pipeline.h b/linux-x64/clang/include/llvm/MCA/Pipeline.h
new file mode 100644
index 0000000..935033f
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Pipeline.h
@@ -0,0 +1,78 @@
+//===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_PIPELINE_H
+#define LLVM_MCA_PIPELINE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+class HWEventListener;
+
+/// A pipeline for a specific subtarget.
+///
+/// It emulates an out-of-order execution of instructions. Instructions are
+/// fetched from a MCInst sequence managed by an initial 'Fetch' stage.
+/// Instructions are firstly fetched, then dispatched to the schedulers, and
+/// then executed.
+///
+/// This class tracks the lifetime of an instruction from the moment where
+/// it gets dispatched to the schedulers, to the moment where it finishes
+/// executing and register writes are architecturally committed.
+/// In particular, it monitors changes in the state of every instruction
+/// in flight.
+///
+/// Instructions are executed in a loop of iterations. The number of iterations
+/// is defined by the SourceMgr object, which is managed by the initial stage
+/// of the instruction pipeline.
+///
+/// The Pipeline entry point is method 'run()' which executes cycles in a loop
+/// until there are new instructions to dispatch, and not every instruction
+/// has been retired.
+///
+/// Internally, the Pipeline collects statistical information in the form of
+/// histograms. For example, it tracks how the dispatch group size changes
+/// over time.
+class Pipeline {
+  Pipeline(const Pipeline &P) = delete;
+  Pipeline &operator=(const Pipeline &P) = delete;
+
+  /// An ordered list of stages that define this instruction pipeline.
+  SmallVector<std::unique_ptr<Stage>, 8> Stages;
+  std::set<HWEventListener *> Listeners;
+  unsigned Cycles;
+
+  Error runCycle();
+  bool hasWorkToProcess();
+  void notifyCycleBegin();
+  void notifyCycleEnd();
+
+public:
+  Pipeline() : Cycles(0) {}
+  void appendStage(std::unique_ptr<Stage> S);
+
+  /// Returns the total number of simulated cycles.
+  Expected<unsigned> run();
+
+  void addEventListener(HWEventListener *Listener);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_PIPELINE_H
diff --git a/linux-x64/clang/include/llvm/MCA/SourceMgr.h b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
new file mode 100644
index 0000000..dbe31db
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
@@ -0,0 +1,56 @@
+//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements class SourceMgr. Class SourceMgr abstracts the input
+/// code sequence (a sequence of MCInst), and assings unique identifiers to
+/// every instruction in the sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SOURCEMGR_H
+#define LLVM_MCA_SOURCEMGR_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace mca {
+
+class Instruction;
+
+typedef std::pair<unsigned, const Instruction &> SourceRef;
+
+class SourceMgr {
+  using UniqueInst = std::unique_ptr<Instruction>;
+  ArrayRef<UniqueInst> Sequence;
+  unsigned Current;
+  const unsigned Iterations;
+  static const unsigned DefaultIterations = 100;
+
+public:
+  SourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
+      : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
+
+  unsigned getNumIterations() const { return Iterations; }
+  unsigned size() const { return Sequence.size(); }
+  bool hasNext() const { return Current < (Iterations * Sequence.size()); }
+  void updateNext() { ++Current; }
+
+  SourceRef peekNext() const {
+    assert(hasNext() && "Already at end of sequence!");
+    return SourceRef(Current, *Sequence[Current % Sequence.size()]);
+  }
+
+  using const_iterator = ArrayRef<UniqueInst>::const_iterator;
+  const_iterator begin() const { return Sequence.begin(); }
+  const_iterator end() const { return Sequence.end(); }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SOURCEMGR_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
new file mode 100644
index 0000000..e39f03e
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
@@ -0,0 +1,90 @@
+//===----------------------- DispatchStage.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_DISPATCH_STAGE_H
+#define LLVM_MCA_DISPATCH_STAGE_H
+
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+// Implements the hardware dispatch logic.
+//
+// This class is responsible for the dispatch stage, in which instructions are
+// dispatched in groups to the Scheduler.  An instruction can be dispatched if
+// the following conditions are met:
+//  1) There are enough entries in the reorder buffer (see class
+//     RetireControlUnit) to write the opcodes associated with the instruction.
+//  2) There are enough physical registers to rename output register operands.
+//  3) There are enough entries available in the used buffered resource(s).
+//
+// The number of micro opcodes that can be dispatched in one cycle is limited by
+// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
+// processor resources are not available. Dispatch stall events are counted
+// during the entire execution of the code, and displayed by the performance
+// report when flag '-dispatch-stats' is specified.
+//
+// If the number of micro opcodes exceedes DispatchWidth, then the instruction
+// is dispatched in multiple cycles.
+class DispatchStage final : public Stage {
+  unsigned DispatchWidth;
+  unsigned AvailableEntries;
+  unsigned CarryOver;
+  InstRef CarriedOver;
+  const MCSubtargetInfo &STI;
+  RetireControlUnit &RCU;
+  RegisterFile &PRF;
+
+  bool checkRCU(const InstRef &IR) const;
+  bool checkPRF(const InstRef &IR) const;
+  bool canDispatch(const InstRef &IR) const;
+  Error dispatch(InstRef IR);
+
+  void notifyInstructionDispatched(const InstRef &IR,
+                                   ArrayRef<unsigned> UsedPhysRegs,
+                                   unsigned uOps) const;
+
+public:
+  DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
+                unsigned MaxDispatchWidth, RetireControlUnit &R,
+                RegisterFile &F)
+      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+        CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+
+  bool isAvailable(const InstRef &IR) const override;
+
+  // The dispatch logic internally doesn't buffer instructions. So there is
+  // never work to do at the beginning of every cycle.
+  bool hasWorkToComplete() const override { return false; }
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_DISPATCH_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h b/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h
new file mode 100644
index 0000000..59a2daf
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/EntryStage.h
@@ -0,0 +1,51 @@
+//===---------------------- EntryStage.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Entry stage of an instruction pipeline.  Its sole
+/// purpose in life is to pick instructions in sequence and move them to the
+/// next pipeline stage.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_ENTRY_STAGE_H
+#define LLVM_MCA_ENTRY_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class EntryStage final : public Stage {
+  InstRef CurrentInstruction;
+  SmallVector<std::unique_ptr<Instruction>, 16> Instructions;
+  SourceMgr &SM;
+  unsigned NumRetired;
+
+  // Updates the program counter, and sets 'CurrentInstruction'.
+  void getNextInstruction();
+
+  EntryStage(const EntryStage &Other) = delete;
+  EntryStage &operator=(const EntryStage &Other) = delete;
+
+public:
+  EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { }
+
+  bool isAvailable(const InstRef &IR) const override;
+  bool hasWorkToComplete() const override;
+  Error execute(InstRef &IR) override;
+  Error cycleStart() override;
+  Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_FETCH_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h b/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h
new file mode 100644
index 0000000..ec9eae0
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/ExecuteStage.h
@@ -0,0 +1,79 @@
+//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of a default instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_EXECUTE_STAGE_H
+#define LLVM_MCA_EXECUTE_STAGE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class ExecuteStage final : public Stage {
+  Scheduler &HWS;
+
+  Error issueInstruction(InstRef &IR);
+
+  // Called at the beginning of each cycle to issue already dispatched
+  // instructions to the underlying pipelines.
+  Error issueReadyInstructions();
+
+  // Used to notify instructions eliminated at register renaming stage.
+  Error handleInstructionEliminated(InstRef &IR);
+
+  ExecuteStage(const ExecuteStage &Other) = delete;
+  ExecuteStage &operator=(const ExecuteStage &Other) = delete;
+
+public:
+  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+
+  // This stage works under the assumption that the Pipeline will eventually
+  // execute a retire stage. We don't need to check if pipelines and/or
+  // schedulers have instructions to process, because those instructions are
+  // also tracked by the retire control unit. That means,
+  // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
+  // are still instructions in-flight in the out-of-order backend.
+  bool hasWorkToComplete() const override { return false; }
+  bool isAvailable(const InstRef &IR) const override;
+
+  // Notifies the scheduler that a new cycle just started.
+  //
+  // This method notifies the scheduler that a new cycle started.
+  // This method is also responsible for notifying listeners about instructions
+  // state changes, and processor resources freed by the scheduler.
+  // Instructions that transitioned to the 'Executed' state are automatically
+  // moved to the next stage (i.e. RetireStage).
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
+
+  void notifyInstructionIssued(
+      const InstRef &IR,
+      MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
+  void notifyInstructionExecuted(const InstRef &IR) const;
+  void notifyInstructionReady(const InstRef &IR) const;
+  void notifyResourceAvailable(const ResourceRef &RR) const;
+
+  // Notify listeners that buffered resources have been consumed or freed.
+  void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_EXECUTE_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h b/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h
new file mode 100644
index 0000000..4b463c9
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/InstructionTables.h
@@ -0,0 +1,45 @@
+//===--------------------- InstructionTables.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a custom stage to generate instruction tables.
+/// See the description of command-line flag -instruction-tables in
+/// docs/CommandGuide/lvm-mca.rst
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRUCTIONTABLES_H
+#define LLVM_MCA_INSTRUCTIONTABLES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class InstructionTables final : public Stage {
+  const MCSchedModel &SM;
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+  SmallVector<uint64_t, 8> Masks;
+
+public:
+  InstructionTables(const MCSchedModel &Model)
+      : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
+    computeProcResourceMasks(Model, Masks);
+  }
+
+  bool hasWorkToComplete() const override { return false; }
+  Error execute(InstRef &IR) override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRUCTIONTABLES_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
new file mode 100644
index 0000000..08c216a
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
@@ -0,0 +1,47 @@
+//===---------------------- RetireStage.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of a default instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RETIRE_STAGE_H
+#define LLVM_MCA_RETIRE_STAGE_H
+
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class RetireStage final : public Stage {
+  // Owner will go away when we move listeners/eventing to the stages.
+  RetireControlUnit &RCU;
+  RegisterFile &PRF;
+
+  RetireStage(const RetireStage &Other) = delete;
+  RetireStage &operator=(const RetireStage &Other) = delete;
+
+public:
+  RetireStage(RetireControlUnit &R, RegisterFile &F)
+      : Stage(), RCU(R), PRF(F) {}
+
+  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
+  void notifyInstructionRetired(const InstRef &IR) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RETIRE_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/Stage.h b/linux-x64/clang/include/llvm/MCA/Stages/Stage.h
new file mode 100644
index 0000000..46b242c
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Stages/Stage.h
@@ -0,0 +1,87 @@
+//===---------------------- Stage.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_STAGE_H
+#define LLVM_MCA_STAGE_H
+
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Error.h"
+#include <set>
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+
+class Stage {
+  Stage *NextInSequence;
+  std::set<HWEventListener *> Listeners;
+
+  Stage(const Stage &Other) = delete;
+  Stage &operator=(const Stage &Other) = delete;
+
+protected:
+  const std::set<HWEventListener *> &getListeners() const { return Listeners; }
+
+public:
+  Stage() : NextInSequence(nullptr) {}
+  virtual ~Stage();
+
+  /// Returns true if it can execute IR during this cycle.
+  virtual bool isAvailable(const InstRef &IR) const { return true; }
+
+  /// Returns true if some instructions are still executing this stage.
+  virtual bool hasWorkToComplete() const = 0;
+
+  /// Called once at the start of each cycle.  This can be used as a setup
+  /// phase to prepare for the executions during the cycle.
+  virtual Error cycleStart() { return ErrorSuccess(); }
+
+  /// Called once at the end of each cycle.
+  virtual Error cycleEnd() { return ErrorSuccess(); }
+
+  /// The primary action that this stage performs on instruction IR.
+  virtual Error execute(InstRef &IR) = 0;
+
+  void setNextInSequence(Stage *NextStage) {
+    assert(!NextInSequence && "This stage already has a NextInSequence!");
+    NextInSequence = NextStage;
+  }
+
+  bool checkNextStage(const InstRef &IR) const {
+    return NextInSequence && NextInSequence->isAvailable(IR);
+  }
+
+  /// Called when an instruction is ready to move the next pipeline stage.
+  ///
+  /// Stages are responsible for moving instructions to their immediate
+  /// successor stages.
+  Error moveToTheNextStage(InstRef &IR) {
+    assert(checkNextStage(IR) && "Next stage is not ready!");
+    return NextInSequence->execute(IR);
+  }
+
+  /// Add a listener to receive callbacks during the execution of this stage.
+  void addListener(HWEventListener *Listener);
+
+  /// Notify listeners of a particular hardware event.
+  template <typename EventT> void notifyEvent(const EventT &Event) const {
+    for (HWEventListener *Listener : Listeners)
+      Listener->onEvent(Event);
+  }
+};
+
+} // namespace mca
+} // namespace llvm
+#endif // LLVM_MCA_STAGE_H
diff --git a/linux-x64/clang/include/llvm/MCA/Support.h b/linux-x64/clang/include/llvm/MCA/Support.h
new file mode 100644
index 0000000..fc36ed4
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/Support.h
@@ -0,0 +1,107 @@
+//===--------------------- Support.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Helper functions used by various pipeline components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SUPPORT_H
+#define LLVM_MCA_SUPPORT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+template <typename T>
+class InstructionError : public ErrorInfo<InstructionError<T>> {
+public:
+  static char ID;
+  std::string Message;
+  const T &Inst;
+
+  InstructionError(std::string M, const T &MCI)
+      : Message(std::move(M)), Inst(MCI) {}
+
+  void log(raw_ostream &OS) const override { OS << Message; }
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+};
+
+template <typename T> char InstructionError<T>::ID;
+
+/// This class represents the number of cycles per resource (fractions of
+/// cycles).  That quantity is managed here as a ratio, and accessed via the
+/// double cast-operator below.  The two quantities, number of cycles and
+/// number of resources, are kept separate.  This is used by the
+/// ResourcePressureView to calculate the average resource cycles
+/// per instruction/iteration.
+class ResourceCycles {
+  unsigned Numerator, Denominator;
+
+public:
+  ResourceCycles() : Numerator(0), Denominator(1) {}
+  ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
+      : Numerator(Cycles), Denominator(ResourceUnits) {}
+
+  operator double() const {
+    assert(Denominator && "Invalid denominator (must be non-zero).");
+    return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
+  }
+
+  unsigned getNumerator() const { return Numerator; }
+  unsigned getDenominator() const { return Denominator; }
+
+  // Add the components of RHS to this instance.  Instead of calculating
+  // the final value here, we keep track of the numerator and denominator
+  // separately, to reduce floating point error.
+  ResourceCycles &operator+=(const ResourceCycles &RHS);
+};
+
+/// Populates vector Masks with processor resource masks.
+///
+/// The number of bits set in a mask depends on the processor resource type.
+/// Each processor resource mask has at least one bit set. For groups, the
+/// number of bits set in the mask is equal to the cardinality of the group plus
+/// one. Excluding the most significant bit, the remaining bits in the mask
+/// identify processor resources that are part of the group.
+///
+/// Example:
+///
+///  ResourceA  -- Mask: 0b001
+///  ResourceB  -- Mask: 0b010
+///  ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
+///
+/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
+/// Each resource mask uniquely identifies a resource; both ResourceA and
+/// ResourceB only have one bit set.
+/// ResourceAB is a group; excluding the most significant bit in the mask, the
+/// remaining bits identify the composition of the group.
+///
+/// Resource masks are used by the ResourceManager to solve set membership
+/// problems with simple bit manipulation operations.
+void computeProcResourceMasks(const MCSchedModel &SM,
+                              MutableArrayRef<uint64_t> Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+///  - NumMicroOps / DispatchWidth
+///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+                               unsigned NumMicroOps,
+                               ArrayRef<unsigned> ProcResourceUsage);
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SUPPORT_H