Update clang to r339409. Change-Id: I800772d2d838223be1f6b40d490c4591b937fca2

commit: cdfccccb1ea3e050b07dc59f87e214ee979da2ae [log] [tgz]
author: Andrew Scull <ascull@google.com> Fri Oct 05 20:58:37 2018 +0100
committer: Andrew Scull <ascull@google.com> Fri Oct 05 21:44:43 2018 +0100
tree: 819b945b890b8bfabe7e3c6e29c75e0dba88961d
parent: 5e1ddfae19f7052e9e27b1bf2c2d8ea3c9bcf5b0 [diff] [blame]
diff --git a/linux-x64/clang/include/llvm/MC/MCSchedule.h b/linux-x64/clang/include/llvm/MC/MCSchedule.h
index 62f8efd..f2f1dfb 100644
--- a/linux-x64/clang/include/llvm/MC/MCSchedule.h
+++ b/linux-x64/clang/include/llvm/MC/MCSchedule.h

@@ -16,6 +16,7 @@
 #define LLVM_MC_MCSCHEDULE_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 
@@ -23,6 +24,9 @@
 
 struct InstrItinerary;
 class MCSubtargetInfo;
+class MCInstrInfo;
+class MCInst;
+class InstrItineraryData;
 
 /// Define a kind of processor resource that will be modeled by the scheduler.
 struct MCProcResourceDesc {
@@ -128,6 +132,64 @@
   }
 };
 
+/// Specify the cost of a register definition in terms of number of physical
+/// register allocated at register renaming stage. For example, AMD Jaguar.
+/// natively supports 128-bit data types, and operations on 256-bit registers
+/// (i.e. YMM registers) are internally split into two COPs (complex operations)
+/// and each COP updates a physical register. Basically, on Jaguar, a YMM
+/// register write effectively consumes two physical registers. That means,
+/// the cost of a YMM write in the BtVer2 model is 2.
+struct MCRegisterCostEntry {
+  unsigned RegisterClassID;
+  unsigned Cost;
+};
+
+/// A register file descriptor.
+///
+/// This struct allows to describe processor register files. In particular, it
+/// helps describing the size of the register file, as well as the cost of
+/// allocating a register file at register renaming stage.
+/// FIXME: this struct can be extended to provide information about the number
+/// of read/write ports to the register file.  A value of zero for field
+/// 'NumPhysRegs' means: this register file has an unbounded number of physical
+/// registers.
+struct MCRegisterFileDesc {
+  const char *Name;
+  uint16_t NumPhysRegs;
+  uint16_t NumRegisterCostEntries;
+  // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
+  uint16_t RegisterCostEntryIdx;
+};
+
+/// Provide extra details about the machine processor.
+///
+/// This is a collection of "optional" processor information that is not
+/// normally used by the LLVM machine schedulers, but that can be consumed by
+/// external tools like llvm-mca to improve the quality of the peformance
+/// analysis.
+struct MCExtraProcessorInfo {
+  // Actual size of the reorder buffer in hardware.
+  unsigned ReorderBufferSize;
+  // Number of instructions retired per cycle.
+  unsigned MaxRetirePerCycle;
+  const MCRegisterFileDesc *RegisterFiles;
+  unsigned NumRegisterFiles;
+  const MCRegisterCostEntry *RegisterCostTable;
+  unsigned NumRegisterCostEntries;
+
+  struct PfmCountersInfo {
+    // An optional name of a performance counter that can be used to measure
+    // cycles.
+    const char *CycleCounter;
+
+    // For each MCProcResourceDesc defined by the processor, an optional list of
+    // names of performance counters that can be used to measure the resource
+    // utilization.
+    const char **IssueCounters;
+  };
+  PfmCountersInfo PfmCounters;
+};
+
 /// Machine model for scheduling, bundling, and heuristics.
 ///
 /// The machine model directly provides basic information about the
@@ -138,9 +200,62 @@
 /// provides a detailed reservation table describing each cycle of instruction
 /// execution. Subtargets may define any or all of the above categories of data
 /// depending on the type of CPU and selected scheduler.
+///
+/// The machine independent properties defined here are used by the scheduler as
+/// an abstract machine model. A real micro-architecture has a number of
+/// buffers, queues, and stages. Declaring that a given machine-independent
+/// abstract property corresponds to a specific physical property across all
+/// subtargets can't be done. Nonetheless, the abstract model is
+/// useful. Futhermore, subtargets typically extend this model with processor
+/// specific resources to model any hardware features that can be exploited by
+/// sceduling heuristics and aren't sufficiently represented in the abstract.
+///
+/// The abstract pipeline is built around the notion of an "issue point". This
+/// is merely a reference point for counting machine cycles. The physical
+/// machine will have pipeline stages that delay execution. The scheduler does
+/// not model those delays because they are irrelevant as long as they are
+/// consistent. Inaccuracies arise when instructions have different execution
+/// delays relative to each other, in addition to their intrinsic latency. Those
+/// special cases can be handled by TableGen constructs such as, ReadAdvance,
+/// which reduces latency when reading data, and ResourceCycles, which consumes
+/// a processor resource when writing data for a number of abstract
+/// cycles.
+///
+/// TODO: One tool currently missing is the ability to add a delay to
+/// ResourceCycles. That would be easy to add and would likely cover all cases
+/// currently handled by the legacy itinerary tables.
+///
+/// A note on out-of-order execution and, more generally, instruction
+/// buffers. Part of the CPU pipeline is always in-order. The issue point, which
+/// is the point of reference for counting cycles, only makes sense as an
+/// in-order part of the pipeline. Other parts of the pipeline are sometimes
+/// falling behind and sometimes catching up. It's only interesting to model
+/// those other, decoupled parts of the pipeline if they may be predictably
+/// resource constrained in a way that the scheduler can exploit.
+///
+/// The LLVM machine model distinguishes between in-order constraints and
+/// out-of-order constraints so that the target's scheduling strategy can apply
+/// appropriate heuristics. For a well-balanced CPU pipeline, out-of-order
+/// resources would not typically be treated as a hard scheduling
+/// constraint. For example, in the GenericScheduler, a delay caused by limited
+/// out-of-order resources is not directly reflected in the number of cycles
+/// that the scheduler sees between issuing an instruction and its dependent
+/// instructions. In other words, out-of-order resources don't directly increase
+/// the latency between pairs of instructions. However, they can still be used
+/// to detect potential bottlenecks across a sequence of instructions and bias
+/// the scheduling heuristics appropriately.
 struct MCSchedModel {
   // IssueWidth is the maximum number of instructions that may be scheduled in
-  // the same per-cycle group.
+  // the same per-cycle group. This is meant to be a hard in-order constraint
+  // (a.k.a. "hazard"). In the GenericScheduler strategy, no more than
+  // IssueWidth micro-ops can ever be scheduled in a particular cycle.
+  //
+  // In practice, IssueWidth is useful to model any bottleneck between the
+  // decoder (after micro-op expansion) and the out-of-order reservation
+  // stations or the decoder bandwidth itself. If the total number of
+  // reservation stations is also a bottleneck, or if any other pipeline stage
+  // has a bandwidth limitation, then that can be naturally modeled by adding an
+  // out-of-order processor resource.
   unsigned IssueWidth;
   static const unsigned DefaultIssueWidth = 1;
 
@@ -198,11 +313,21 @@
   friend class InstrItineraryData;
   const InstrItinerary *InstrItineraries;
 
+  const MCExtraProcessorInfo *ExtraProcessorInfo;
+
+  bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
+
   unsigned getProcessorID() const { return ProcID; }
 
   /// Does this machine model include instruction-level scheduling.
   bool hasInstrSchedModel() const { return SchedClassTable; }
 
+  const MCExtraProcessorInfo &getExtraProcessorInfo() const {
+    assert(hasExtraProcessorInfo() &&
+           "No extra information available for this model");
+    return *ExtraProcessorInfo;
+  }
+
   /// Return true if this machine model data for all instructions with a
   /// scheduling class (itinerary class or SchedRW list).
   bool isComplete() const { return CompleteModel; }
@@ -232,16 +357,27 @@
   static int computeInstrLatency(const MCSubtargetInfo &STI,
                                  const MCSchedClassDesc &SCDesc);
 
-  /// Returns the reciprocal throughput information from a MCSchedClassDesc.
-  static Optional<double>
+  int computeInstrLatency(const MCSubtargetInfo &STI, unsigned SClass) const;
+  int computeInstrLatency(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+                          const MCInst &Inst) const;
+
+  // Returns the reciprocal throughput information from a MCSchedClassDesc.
+  static double
   getReciprocalThroughput(const MCSubtargetInfo &STI,
                           const MCSchedClassDesc &SCDesc);
 
+  static double
+  getReciprocalThroughput(unsigned SchedClass, const InstrItineraryData &IID);
+
+  double
+  getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+                          const MCInst &Inst) const;
+
   /// Returns the default initialized model.
   static const MCSchedModel &GetDefaultSchedModel() { return Default; }
   static const MCSchedModel Default;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
commit	cdfccccb1ea3e050b07dc59f87e214ee979da2ae	[log] [tgz]
author	Andrew Scull <ascull@google.com>	Fri Oct 05 20:58:37 2018 +0100
committer	Andrew Scull <ascull@google.com>	Fri Oct 05 21:44:43 2018 +0100
tree	819b945b890b8bfabe7e3c6e29c75e0dba88961d
parent	5e1ddfae19f7052e9e27b1bf2c2d8ea3c9bcf5b0 [diff] [blame]