Update prebuilt Clang to r416183b from Android.
https://android.googlesource.com/platform/prebuilts/clang/host/
linux-x86/+/06a71ddac05c22edb2d10b590e1769b3f8619bef
clang 12.0.5 (based on r416183b) from build 7284624.
Change-Id: I277a316abcf47307562d8b748b84870f31a72866
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/linux-x64/clang/include/llvm/MCA/CodeEmitter.h b/linux-x64/clang/include/llvm/MCA/CodeEmitter.h
new file mode 100644
index 0000000..edbadcc
--- /dev/null
+++ b/linux-x64/clang/include/llvm/MCA/CodeEmitter.h
@@ -0,0 +1,69 @@
+//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A utility class used to compute instruction encodings. It buffers encodings
+/// for later usage. It exposes a simple API to compute and get the encodings as
+/// StringRef.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_CODEEMITTER_H
+#define LLVM_MCA_CODEEMITTER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <string>
+
+namespace llvm {
+namespace mca {
+
+/// A utility class used to compute instruction encodings for a code region.
+///
+/// It provides a simple API to compute and return instruction encodings as
+/// strings. Encodings are cached internally for later usage.
+class CodeEmitter {
+ const MCSubtargetInfo &STI;
+ const MCAsmBackend &MAB;
+ const MCCodeEmitter &MCE;
+
+ SmallString<256> Code;
+ raw_svector_ostream VecOS;
+ ArrayRef<MCInst> Sequence;
+
+ // An EncodingInfo pair stores <base, length> information. Base (i.e. first)
+ // is an index to the `Code`. Length (i.e. second) is the encoding size.
+ using EncodingInfo = std::pair<unsigned, unsigned>;
+
+ // A cache of encodings.
+ SmallVector<EncodingInfo, 16> Encodings;
+
+ EncodingInfo getOrCreateEncodingInfo(unsigned MCID);
+
+public:
+ CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB,
+ const MCCodeEmitter &CE, ArrayRef<MCInst> S)
+ : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S),
+ Encodings(S.size()) {}
+
+ StringRef getEncoding(unsigned MCID) {
+ EncodingInfo EI = getOrCreateEncodingInfo(MCID);
+ return StringRef(&Code[EI.first], EI.second);
+ }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_CODEEMITTER_H
diff --git a/linux-x64/clang/include/llvm/MCA/Context.h b/linux-x64/clang/include/llvm/MCA/Context.h
index 503d780..af3cb8e 100644
--- a/linux-x64/clang/include/llvm/MCA/Context.h
+++ b/linux-x64/clang/include/llvm/MCA/Context.h
@@ -20,7 +20,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
-#include "llvm/MCA/InstrBuilder.h"
#include "llvm/MCA/Pipeline.h"
#include "llvm/MCA/SourceMgr.h"
#include <memory>
@@ -58,6 +57,9 @@
Context(const Context &C) = delete;
Context &operator=(const Context &C) = delete;
+ const MCRegisterInfo &getMCRegisterInfo() const { return MRI; }
+ const MCSubtargetInfo &getMCSubtargetInfo() const { return STI; }
+
void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
Hardware.push_back(std::move(H));
}
@@ -65,7 +67,6 @@
/// Construct a basic pipeline for simulating an out-of-order pipeline.
/// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
- InstrBuilder &IB,
SourceMgr &SrcMgr);
};
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
index ae9a49c..2f9b4ba 100644
--- a/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -24,8 +24,6 @@
namespace llvm {
namespace mca {
-class Scheduler;
-
/// A node of a memory dependency graph. A MemoryGroup describes a set of
/// instructions with same memory dependencies.
///
@@ -42,7 +40,10 @@
unsigned NumInstructions;
unsigned NumExecuting;
unsigned NumExecuted;
- SmallVector<MemoryGroup *, 4> Succ;
+ // Successors that are in a order dependency with this group.
+ SmallVector<MemoryGroup *, 4> OrderSucc;
+ // Successors that are in a data dependency with this group.
+ SmallVector<MemoryGroup *, 4> DataSucc;
CriticalDependency CriticalPredecessor;
InstRef CriticalMemoryInstruction;
@@ -57,8 +58,9 @@
NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
MemoryGroup(MemoryGroup &&) = default;
- ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
- unsigned getNumSuccessors() const { return Succ.size(); }
+ size_t getNumSuccessors() const {
+ return OrderSucc.size() + DataSucc.size();
+ }
unsigned getNumPredecessors() const { return NumPredecessors; }
unsigned getNumExecutingPredecessors() const {
return NumExecutingPredecessors;
@@ -77,12 +79,22 @@
return CriticalPredecessor;
}
- void addSuccessor(MemoryGroup *Group) {
+ void addSuccessor(MemoryGroup *Group, bool IsDataDependent) {
+ // Do not need to add a dependency if there is no data
+ // dependency and all instructions from this group have been
+ // issued already.
+ if (!IsDataDependent && isExecuting())
+ return;
+
Group->NumPredecessors++;
assert(!isExecuted() && "Should have been removed!");
if (isExecuting())
- Group->onGroupIssued(CriticalMemoryInstruction);
- Succ.emplace_back(Group);
+ Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent);
+
+ if (IsDataDependent)
+ DataSucc.emplace_back(Group);
+ else
+ OrderSucc.emplace_back(Group);
}
bool isWaiting() const {
@@ -100,10 +112,13 @@
}
bool isExecuted() const { return NumInstructions == NumExecuted; }
- void onGroupIssued(const InstRef &IR) {
+ void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) {
assert(!isReady() && "Unexpected group-start event!");
NumExecutingPredecessors++;
+ if (!ShouldUpdateCriticalDep)
+ return;
+
unsigned Cycles = IR.getInstruction()->getCyclesLeft();
if (CriticalPredecessor.Cycles < Cycles) {
CriticalPredecessor.IID = IR.getSourceIndex();
@@ -135,8 +150,14 @@
return;
// Notify successors that this group started execution.
- for (MemoryGroup *MG : Succ)
- MG->onGroupIssued(CriticalMemoryInstruction);
+ for (MemoryGroup *MG : OrderSucc) {
+ MG->onGroupIssued(CriticalMemoryInstruction, false);
+ // Release the order dependency with this group.
+ MG->onGroupExecuted();
+ }
+
+ for (MemoryGroup *MG : DataSucc)
+ MG->onGroupIssued(CriticalMemoryInstruction, true);
}
void onInstructionExecuted() {
@@ -147,8 +168,8 @@
if (!isExecuted())
return;
- // Notify successors that this group has finished execution.
- for (MemoryGroup *MG : Succ)
+ // Notify data dependent successors that this group has finished execution.
+ for (MemoryGroup *MG : DataSucc)
MG->onGroupExecuted();
}
@@ -209,8 +230,10 @@
unsigned getUsedLQEntries() const { return UsedLQEntries; }
unsigned getUsedSQEntries() const { return UsedSQEntries; }
- unsigned assignLQSlot() { return UsedLQEntries++; }
- unsigned assignSQSlot() { return UsedSQEntries++; }
+ void acquireLQSlot() { ++UsedLQEntries; }
+ void acquireSQSlot() { ++UsedSQEntries; }
+ void releaseLQSlot() { --UsedLQEntries; }
+ void releaseSQSlot() { --UsedSQEntries; }
bool assumeNoAlias() const { return NoAlias; }
@@ -285,13 +308,18 @@
unsigned createMemoryGroup() {
Groups.insert(
- std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+ std::make_pair(NextGroupID, std::make_unique<MemoryGroup>()));
return NextGroupID++;
}
- // Instruction executed event handlers.
virtual void onInstructionExecuted(const InstRef &IR);
+ // Loads are tracked by the LDQ (load queue) from dispatch until completion.
+ // Stores are tracked by the STQ (store queue) from dispatch until commitment.
+ // By default we conservatively assume that the LDQ receives a load at
+ // dispatch. Loads leave the LDQ at retirement stage.
+ virtual void onInstructionRetired(const InstRef &IR);
+
virtual void onInstructionIssued(const InstRef &IR) {
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
Groups[GroupID]->onInstructionIssued(IR);
@@ -407,6 +435,7 @@
unsigned CurrentLoadGroupID;
unsigned CurrentLoadBarrierGroupID;
unsigned CurrentStoreGroupID;
+ unsigned CurrentStoreBarrierGroupID;
public:
LSUnit(const MCSchedModel &SM)
@@ -415,7 +444,8 @@
: LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
: LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
- CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
+ CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0),
+ CurrentStoreBarrierGroupID(0) {}
/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
/// accomodate instruction IR.
@@ -436,9 +466,6 @@
/// 6. A store has to wait until an older store barrier is fully executed.
unsigned dispatch(const InstRef &IR) override;
- // FIXME: For simplicity, we optimistically assume a similar behavior for
- // store instructions. In practice, store operations don't tend to leave the
- // store queue until they reach the 'Retired' stage (See PR39830).
void onInstructionExecuted(const InstRef &IR) override;
};
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 3650632..e8ca348 100644
--- a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -22,7 +22,6 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
-#include "llvm/Support/Error.h"
namespace llvm {
namespace mca {
@@ -84,7 +83,7 @@
// the target name).
//
// Users can limit the number of physical registers that are available in
- // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
+ // register file #0 specifying command line flag `-register-file-size=<uint>`.
SmallVector<RegisterMappingTracker, 4> RegisterFiles;
// This type is used to propagate information about the owner of a register,
@@ -220,7 +219,7 @@
//
// Current implementation can simulate up to 32 register files (including the
// special register file at index #0).
- unsigned isAvailable(ArrayRef<unsigned> Regs) const;
+ unsigned isAvailable(ArrayRef<MCPhysReg> Regs) const;
// Returns the number of PRFs implemented by this processor.
unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 2f91185..b6d4e34 100644
--- a/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -15,7 +15,6 @@
#ifndef LLVM_MCA_RESOURCE_MANAGER_H
#define LLVM_MCA_RESOURCE_MANAGER_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSchedule.h"
@@ -33,8 +32,7 @@
/// with a buffer size of -1 is always available if it is not reserved.
///
/// Values of type ResourceStateEvent are returned by method
-/// ResourceState::isBufferAvailable(), which is used to query the internal
-/// state of a resource.
+/// ResourceManager::canBeDispatched()
///
/// The naming convention for resource state events is:
/// * Event names start with prefix RS_
@@ -263,16 +261,26 @@
/// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
ResourceStateEvent isBufferAvailable() const;
- /// Reserve a slot in the buffer.
- void reserveBuffer() {
- if (AvailableSlots)
- AvailableSlots--;
+ /// Reserve a buffer slot.
+ ///
+ /// Returns true if the buffer is not full.
+ /// It always returns true if BufferSize is set to zero.
+ bool reserveBuffer() {
+ if (BufferSize <= 0)
+ return true;
+
+ --AvailableSlots;
+ assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+ return AvailableSlots;
}
- /// Release a slot in the buffer.
+ /// Releases a slot in the buffer.
void releaseBuffer() {
- if (BufferSize > 0)
- AvailableSlots++;
+ // Ignore dispatch hazards or invalid buffer sizes.
+ if (BufferSize <= 0)
+ return;
+
+ ++AvailableSlots;
assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
}
@@ -351,9 +359,16 @@
// Set of processor resource units that are available during this cycle.
uint64_t AvailableProcResUnits;
- // Set of processor resource groups that are currently reserved.
+ // Set of processor resources that are currently reserved.
uint64_t ReservedResourceGroups;
+ // Set of unavailable scheduler buffer resources. This is used internally to
+ // speedup `canBeDispatched()` queries.
+ uint64_t AvailableBuffers;
+
+ // Set of dispatch hazard buffer resources that are currently unavailable.
+ uint64_t ReservedBuffers;
+
// Returns the actual resource unit that will be used.
ResourceRef selectPipe(uint64_t ResourceID);
@@ -382,17 +397,20 @@
// Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
// there are enough available slots in the buffers.
- ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+ ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const;
// Return the processor resource identifier associated to this Mask.
unsigned resolveResourceMask(uint64_t Mask) const;
- // Consume a slot in every buffered resource from array 'Buffers'. Resource
- // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
- void reserveBuffers(ArrayRef<uint64_t> Buffers);
+ // Acquires a slot from every buffered resource in mask `ConsumedBuffers`.
+ // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+ void reserveBuffers(uint64_t ConsumedBuffers);
- // Release buffer entries previously allocated by method reserveBuffers.
- void releaseBuffers(ArrayRef<uint64_t> Buffers);
+ // Releases a slot from every buffered resource in mask `ConsumedBuffers`.
+ // ConsumedBuffers is a bitmask of previously acquired buffers (using method
+ // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are
+ // not automatically unreserved by this method.
+ void releaseBuffers(uint64_t ConsumedBuffers);
// Reserve a processor resource. A reserved resource is not available for
// instruction issue until it is released.
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 0629014..acbd454 100644
--- a/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -57,34 +57,43 @@
private:
unsigned NextAvailableSlotIdx;
unsigned CurrentInstructionSlotIdx;
- unsigned AvailableSlots;
+ unsigned NumROBEntries;
+ unsigned AvailableEntries;
unsigned MaxRetirePerCycle; // 0 means no limit.
std::vector<RUToken> Queue;
-public:
- RetireControlUnit(const MCSchedModel &SM);
-
- bool isEmpty() const { return AvailableSlots == Queue.size(); }
- bool isAvailable(unsigned Quantity = 1) const {
+ unsigned normalizeQuantity(unsigned Quantity) const {
// Some instructions may declare a number of uOps which exceeds the size
// of the reorder buffer. To avoid problems, cap the amount of slots to
// the size of the reorder buffer.
- Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+ Quantity = std::min(Quantity, NumROBEntries);
// Further normalize the number of micro opcodes for instructions that
// declare zero opcodes. This should match the behavior of method
// reserveSlot().
- Quantity = std::max(Quantity, 1U);
- return AvailableSlots >= Quantity;
+ return std::max(Quantity, 1U);
+ }
+
+ unsigned computeNextSlotIdx() const;
+
+public:
+ RetireControlUnit(const MCSchedModel &SM);
+
+ bool isEmpty() const { return AvailableEntries == NumROBEntries; }
+
+ bool isAvailable(unsigned Quantity = 1) const {
+ return AvailableEntries >= normalizeQuantity(Quantity);
}
unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
- // Reserves a number of slots, and returns a new token.
- unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
+ // Reserves a number of slots, and returns a new token reference.
+ unsigned dispatch(const InstRef &IS);
// Return the current token from the RCU's circular token queue.
- const RUToken &peekCurrentToken() const;
+ const RUToken &getCurrentToken() const;
+
+ const RUToken &peekNextToken() const;
// Advance the pointer to the next token in the circular token queue.
void consumeCurrentToken();
diff --git a/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
index 27beb84..0293364 100644
--- a/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/linux-x64/clang/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -68,7 +68,7 @@
/// instructions from the dispatch stage, until the write-back stage.
///
class Scheduler : public HardwareUnit {
- LSUnit &LSU;
+ LSUnitBase &LSU;
// Instruction selection strategy for this Scheduler.
std::unique_ptr<SchedulerStrategy> Strategy;
@@ -154,15 +154,15 @@
bool promoteToPendingSet(SmallVectorImpl<InstRef> &Pending);
public:
- Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+ Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu)
: Scheduler(Model, Lsu, nullptr) {}
- Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+ Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu,
std::unique_ptr<SchedulerStrategy> SelectStrategy)
- : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+ : Scheduler(std::make_unique<ResourceManager>(Model), Lsu,
std::move(SelectStrategy)) {}
- Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+ Scheduler(std::unique_ptr<ResourceManager> RM, LSUnitBase &Lsu,
std::unique_ptr<SchedulerStrategy> SelectStrategy)
: LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0),
NumDispatchedToThePendingSet(0), HadTokenStall(false) {
@@ -228,6 +228,9 @@
SmallVectorImpl<InstRef> &Ready);
/// Convert a resource mask into a valid llvm processor resource identifier.
+ ///
+ /// Only the most significant bit of the Mask is used by this method to
+ /// identify the processor resource.
unsigned getResourceID(uint64_t Mask) const {
return Resources->resolveResourceMask(Mask);
}
@@ -264,9 +267,9 @@
// This routine performs a sanity check. This routine should only be called
// when we know that 'IR' is not in the scheduler's instruction queues.
void sanityCheck(const InstRef &IR) const {
- assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
- assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
- assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+ assert(!is_contained(WaitSet, IR) && "Already in the wait set!");
+ assert(!is_contained(ReadySet, IR) && "Already in the ready set!");
+ assert(!is_contained(IssuedSet, IR) && "Already executing!");
}
#endif // !NDEBUG
};
diff --git a/linux-x64/clang/include/llvm/MCA/Instruction.h b/linux-x64/clang/include/llvm/MCA/Instruction.h
index d4d3f22..c97cb46 100644
--- a/linux-x64/clang/include/llvm/MCA/Instruction.h
+++ b/linux-x64/clang/include/llvm/MCA/Instruction.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCRegister.h" // definition of MCPhysReg.
#include "llvm/Support/MathExtras.h"
#ifndef NDEBUG
@@ -42,7 +43,7 @@
unsigned Latency;
// This field is set to a value different than zero only if this
// is an implicit definition.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Instruction itineraries would set this field to the SchedClass ID.
// Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
// element associated to this write.
@@ -70,7 +71,7 @@
// uses always come first in the sequence of uses.
unsigned UseIndex;
// This field is only set if this is an implicit read.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Scheduling Class Index. It is used to query the scheduling model for the
// MCSchedClassDesc object.
unsigned SchedClassID;
@@ -85,7 +86,7 @@
/// Field RegID is set to the invalid register for memory dependencies.
struct CriticalDependency {
unsigned IID;
- unsigned RegID;
+ MCPhysReg RegID;
unsigned Cycles;
};
@@ -106,7 +107,7 @@
// to speedup queries on the register file.
// For implicit writes, this field always matches the value of
// field RegisterID from WD.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Physical register file that serves register RegisterID.
unsigned PRFID;
@@ -146,7 +147,7 @@
SmallVector<std::pair<ReadState *, int>, 4> Users;
public:
- WriteState(const WriteDescriptor &Desc, unsigned RegID,
+ WriteState(const WriteDescriptor &Desc, MCPhysReg RegID,
bool clearsSuperRegs = false, bool writesZero = false)
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
@@ -158,7 +159,7 @@
int getCyclesLeft() const { return CyclesLeft; }
unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
- unsigned getRegisterID() const { return RegisterID; }
+ MCPhysReg getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
unsigned getLatency() const { return WD->Latency; }
unsigned getDependentWriteCyclesLeft() const {
@@ -200,7 +201,7 @@
}
void setDependentWrite(const WriteState *Other) { DependentWrite = Other; }
- void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
+ void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles);
void setWriteZero() { WritesZero = true; }
void setEliminated() {
assert(Users.empty() && "Write is in an inconsistent state.");
@@ -226,7 +227,7 @@
class ReadState {
const ReadDescriptor *RD;
// Physical register identified associated to this read.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Physical register file that serves register RegisterID.
unsigned PRFID;
// Number of writes that contribute to the definition of RegisterID.
@@ -253,14 +254,14 @@
bool IndependentFromDef;
public:
- ReadState(const ReadDescriptor &Desc, unsigned RegID)
+ ReadState(const ReadDescriptor &Desc, MCPhysReg RegID)
: RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true),
IsZero(false), IndependentFromDef(false) {}
const ReadDescriptor &getDescriptor() const { return *RD; }
unsigned getSchedClass() const { return RD->SchedClassID; }
- unsigned getRegisterID() const { return RegisterID; }
+ MCPhysReg getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
const CriticalDependency &getCriticalRegDep() const { return CRD; }
@@ -272,7 +273,7 @@
void setIndependentFromDef() { IndependentFromDef = true; }
void cycleEvent();
- void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
+ void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles);
void setDependentWrites(unsigned Writes) {
DependentWrites = Writes;
IsReady = !Writes;
@@ -352,11 +353,14 @@
// reports the number of "consumed cycles".
SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
- // A list of buffered resources consumed by this instruction.
- SmallVector<uint64_t, 4> Buffers;
+ // A bitmask of used hardware buffers.
+ uint64_t UsedBuffers;
- unsigned UsedProcResUnits;
- unsigned UsedProcResGroups;
+ // A bitmask of used processor resource units.
+ uint64_t UsedProcResUnits;
+
+ // A bitmask of used processor resource groups.
+ uint64_t UsedProcResGroups;
unsigned MaxLatency;
// Number of MicroOps for this instruction.
@@ -414,6 +418,7 @@
const InstrDesc &getDesc() const { return Desc; }
unsigned getLatency() const { return Desc.MaxLatency; }
+ unsigned getNumMicroOps() const { return Desc.NumMicroOps; }
bool hasDependentUsers() const {
return any_of(Defs,
@@ -463,6 +468,12 @@
// operation.
unsigned LSUTokenID;
+ // A resource mask which identifies buffered resources consumed by this
+ // instruction at dispatch stage. In the absence of macro-fusion, this value
+ // should always match the value of field `UsedBuffers` from the instruction
+ // descriptor (see field InstrBase::Desc).
+ uint64_t UsedBuffers;
+
// Critical register dependency.
CriticalDependency CriticalRegDep;
@@ -480,12 +491,18 @@
public:
Instruction(const InstrDesc &D)
: InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
- RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
- CriticalResourceMask(0), IsEliminated(false) {}
+ RCUTokenID(0), LSUTokenID(0), UsedBuffers(D.UsedBuffers),
+ CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0),
+ IsEliminated(false) {}
unsigned getRCUTokenID() const { return RCUTokenID; }
unsigned getLSUTokenID() const { return LSUTokenID; }
void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
+
+ uint64_t getUsedBuffers() const { return UsedBuffers; }
+ void setUsedBuffers(uint64_t Mask) { UsedBuffers = Mask; }
+ void clearUsedBuffers() { UsedBuffers = 0ULL; }
+
int getCyclesLeft() const { return CyclesLeft; }
// Transition to the dispatch stage, and assign a RCUToken to this
diff --git a/linux-x64/clang/include/llvm/MCA/Pipeline.h b/linux-x64/clang/include/llvm/MCA/Pipeline.h
index 935033f..0ac988c 100644
--- a/linux-x64/clang/include/llvm/MCA/Pipeline.h
+++ b/linux-x64/clang/include/llvm/MCA/Pipeline.h
@@ -15,8 +15,6 @@
#ifndef LLVM_MCA_PIPELINE_H
#define LLVM_MCA_PIPELINE_H
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MCA/HardwareUnits/Scheduler.h"
#include "llvm/MCA/Stages/Stage.h"
#include "llvm/Support/Error.h"
diff --git a/linux-x64/clang/include/llvm/MCA/SourceMgr.h b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
index dbe31db..e844171 100644
--- a/linux-x64/clang/include/llvm/MCA/SourceMgr.h
+++ b/linux-x64/clang/include/llvm/MCA/SourceMgr.h
@@ -16,12 +16,13 @@
#define LLVM_MCA_SOURCEMGR_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/Instruction.h"
namespace llvm {
namespace mca {
-class Instruction;
-
+// MSVC >= 19.15, < 19.20 need to see the definition of class Instruction to
+// prevent compiler error C2139 about intrinsic type trait '__is_assignable'.
typedef std::pair<unsigned, const Instruction &> SourceRef;
class SourceMgr {
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
index d80eded..597f731 100644
--- a/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
+++ b/linux-x64/clang/include/llvm/MCA/Stages/DispatchStage.h
@@ -20,7 +20,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MCA/HWEventListener.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Instruction.h"
diff --git a/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
index 08c216a..f471368 100644
--- a/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
+++ b/linux-x64/clang/include/llvm/MCA/Stages/RetireStage.h
@@ -16,6 +16,7 @@
#ifndef LLVM_MCA_RETIRE_STAGE_H
#define LLVM_MCA_RETIRE_STAGE_H
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Stages/Stage.h"
@@ -27,13 +28,14 @@
// Owner will go away when we move listeners/eventing to the stages.
RetireControlUnit &RCU;
RegisterFile &PRF;
+ LSUnitBase &LSU;
RetireStage(const RetireStage &Other) = delete;
RetireStage &operator=(const RetireStage &Other) = delete;
public:
- RetireStage(RetireControlUnit &R, RegisterFile &F)
- : Stage(), RCU(R), PRF(F) {}
+ RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
+ : Stage(), RCU(R), PRF(F), LSU(LS) {}
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
Error cycleStart() override;