Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame^] | 1 | //===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the classes used to describe a subtarget's machine model |
| 11 | // for scheduling and other instruction cost heuristics. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_MC_MCSCHEDULE_H |
| 16 | #define LLVM_MC_MCSCHEDULE_H |
| 17 | |
| 18 | #include "llvm/ADT/Optional.h" |
| 19 | #include "llvm/Support/DataTypes.h" |
| 20 | #include <cassert> |
| 21 | |
| 22 | namespace llvm { |
| 23 | |
| 24 | struct InstrItinerary; |
| 25 | class MCSubtargetInfo; |
| 26 | |
| 27 | /// Define a kind of processor resource that will be modeled by the scheduler. |
| 28 | struct MCProcResourceDesc { |
| 29 | const char *Name; |
| 30 | unsigned NumUnits; // Number of resource of this kind |
| 31 | unsigned SuperIdx; // Index of the resources kind that contains this kind. |
| 32 | |
| 33 | // Number of resources that may be buffered. |
| 34 | // |
| 35 | // Buffered resources (BufferSize != 0) may be consumed at some indeterminate |
| 36 | // cycle after dispatch. This should be used for out-of-order cpus when |
| 37 | // instructions that use this resource can be buffered in a reservaton |
| 38 | // station. |
| 39 | // |
| 40 | // Unbuffered resources (BufferSize == 0) always consume their resource some |
| 41 | // fixed number of cycles after dispatch. If a resource is unbuffered, then |
| 42 | // the scheduler will avoid scheduling instructions with conflicting resources |
| 43 | // in the same cycle. This is for in-order cpus, or the in-order portion of |
| 44 | // an out-of-order cpus. |
| 45 | int BufferSize; |
| 46 | |
| 47 | // If the resource has sub-units, a pointer to the first element of an array |
| 48 | // of `NumUnits` elements containing the ProcResourceIdx of the sub units. |
| 49 | // nullptr if the resource does not have sub-units. |
| 50 | const unsigned *SubUnitsIdxBegin; |
| 51 | |
| 52 | bool operator==(const MCProcResourceDesc &Other) const { |
| 53 | return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx |
| 54 | && BufferSize == Other.BufferSize; |
| 55 | } |
| 56 | }; |
| 57 | |
| 58 | /// Identify one of the processor resource kinds consumed by a particular |
| 59 | /// scheduling class for the specified number of cycles. |
| 60 | struct MCWriteProcResEntry { |
| 61 | uint16_t ProcResourceIdx; |
| 62 | uint16_t Cycles; |
| 63 | |
| 64 | bool operator==(const MCWriteProcResEntry &Other) const { |
| 65 | return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles; |
| 66 | } |
| 67 | }; |
| 68 | |
| 69 | /// Specify the latency in cpu cycles for a particular scheduling class and def |
| 70 | /// index. -1 indicates an invalid latency. Heuristics would typically consider |
| 71 | /// an instruction with invalid latency to have infinite latency. Also identify |
| 72 | /// the WriteResources of this def. When the operand expands to a sequence of |
| 73 | /// writes, this ID is the last write in the sequence. |
| 74 | struct MCWriteLatencyEntry { |
| 75 | int16_t Cycles; |
| 76 | uint16_t WriteResourceID; |
| 77 | |
| 78 | bool operator==(const MCWriteLatencyEntry &Other) const { |
| 79 | return Cycles == Other.Cycles && WriteResourceID == Other.WriteResourceID; |
| 80 | } |
| 81 | }; |
| 82 | |
| 83 | /// Specify the number of cycles allowed after instruction issue before a |
| 84 | /// particular use operand reads its registers. This effectively reduces the |
| 85 | /// write's latency. Here we allow negative cycles for corner cases where |
| 86 | /// latency increases. This rule only applies when the entry's WriteResource |
| 87 | /// matches the write's WriteResource. |
| 88 | /// |
| 89 | /// MCReadAdvanceEntries are sorted first by operand index (UseIdx), then by |
| 90 | /// WriteResourceIdx. |
| 91 | struct MCReadAdvanceEntry { |
| 92 | unsigned UseIdx; |
| 93 | unsigned WriteResourceID; |
| 94 | int Cycles; |
| 95 | |
| 96 | bool operator==(const MCReadAdvanceEntry &Other) const { |
| 97 | return UseIdx == Other.UseIdx && WriteResourceID == Other.WriteResourceID |
| 98 | && Cycles == Other.Cycles; |
| 99 | } |
| 100 | }; |
| 101 | |
| 102 | /// Summarize the scheduling resources required for an instruction of a |
| 103 | /// particular scheduling class. |
| 104 | /// |
| 105 | /// Defined as an aggregate struct for creating tables with initializer lists. |
| 106 | struct MCSchedClassDesc { |
| 107 | static const unsigned short InvalidNumMicroOps = (1U << 14) - 1; |
| 108 | static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1; |
| 109 | |
| 110 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 111 | const char* Name; |
| 112 | #endif |
| 113 | uint16_t NumMicroOps : 14; |
| 114 | bool BeginGroup : 1; |
| 115 | bool EndGroup : 1; |
| 116 | uint16_t WriteProcResIdx; // First index into WriteProcResTable. |
| 117 | uint16_t NumWriteProcResEntries; |
| 118 | uint16_t WriteLatencyIdx; // First index into WriteLatencyTable. |
| 119 | uint16_t NumWriteLatencyEntries; |
| 120 | uint16_t ReadAdvanceIdx; // First index into ReadAdvanceTable. |
| 121 | uint16_t NumReadAdvanceEntries; |
| 122 | |
| 123 | bool isValid() const { |
| 124 | return NumMicroOps != InvalidNumMicroOps; |
| 125 | } |
| 126 | bool isVariant() const { |
| 127 | return NumMicroOps == VariantNumMicroOps; |
| 128 | } |
| 129 | }; |
| 130 | |
| 131 | /// Machine model for scheduling, bundling, and heuristics. |
| 132 | /// |
| 133 | /// The machine model directly provides basic information about the |
| 134 | /// microarchitecture to the scheduler in the form of properties. It also |
| 135 | /// optionally refers to scheduler resource tables and itinerary |
| 136 | /// tables. Scheduler resource tables model the latency and cost for each |
| 137 | /// instruction type. Itinerary tables are an independent mechanism that |
| 138 | /// provides a detailed reservation table describing each cycle of instruction |
| 139 | /// execution. Subtargets may define any or all of the above categories of data |
| 140 | /// depending on the type of CPU and selected scheduler. |
| 141 | struct MCSchedModel { |
| 142 | // IssueWidth is the maximum number of instructions that may be scheduled in |
| 143 | // the same per-cycle group. |
| 144 | unsigned IssueWidth; |
| 145 | static const unsigned DefaultIssueWidth = 1; |
| 146 | |
| 147 | // MicroOpBufferSize is the number of micro-ops that the processor may buffer |
| 148 | // for out-of-order execution. |
| 149 | // |
| 150 | // "0" means operations that are not ready in this cycle are not considered |
| 151 | // for scheduling (they go in the pending queue). Latency is paramount. This |
| 152 | // may be more efficient if many instructions are pending in a schedule. |
| 153 | // |
| 154 | // "1" means all instructions are considered for scheduling regardless of |
| 155 | // whether they are ready in this cycle. Latency still causes issue stalls, |
| 156 | // but we balance those stalls against other heuristics. |
| 157 | // |
| 158 | // "> 1" means the processor is out-of-order. This is a machine independent |
| 159 | // estimate of highly machine specific characteristics such as the register |
| 160 | // renaming pool and reorder buffer. |
| 161 | unsigned MicroOpBufferSize; |
| 162 | static const unsigned DefaultMicroOpBufferSize = 0; |
| 163 | |
| 164 | // LoopMicroOpBufferSize is the number of micro-ops that the processor may |
| 165 | // buffer for optimized loop execution. More generally, this represents the |
| 166 | // optimal number of micro-ops in a loop body. A loop may be partially |
| 167 | // unrolled to bring the count of micro-ops in the loop body closer to this |
| 168 | // number. |
| 169 | unsigned LoopMicroOpBufferSize; |
| 170 | static const unsigned DefaultLoopMicroOpBufferSize = 0; |
| 171 | |
| 172 | // LoadLatency is the expected latency of load instructions. |
| 173 | unsigned LoadLatency; |
| 174 | static const unsigned DefaultLoadLatency = 4; |
| 175 | |
| 176 | // HighLatency is the expected latency of "very high latency" operations. |
| 177 | // See TargetInstrInfo::isHighLatencyDef(). |
| 178 | // By default, this is set to an arbitrarily high number of cycles |
| 179 | // likely to have some impact on scheduling heuristics. |
| 180 | unsigned HighLatency; |
| 181 | static const unsigned DefaultHighLatency = 10; |
| 182 | |
| 183 | // MispredictPenalty is the typical number of extra cycles the processor |
| 184 | // takes to recover from a branch misprediction. |
| 185 | unsigned MispredictPenalty; |
| 186 | static const unsigned DefaultMispredictPenalty = 10; |
| 187 | |
| 188 | bool PostRAScheduler; // default value is false |
| 189 | |
| 190 | bool CompleteModel; |
| 191 | |
| 192 | unsigned ProcID; |
| 193 | const MCProcResourceDesc *ProcResourceTable; |
| 194 | const MCSchedClassDesc *SchedClassTable; |
| 195 | unsigned NumProcResourceKinds; |
| 196 | unsigned NumSchedClasses; |
| 197 | // Instruction itinerary tables used by InstrItineraryData. |
| 198 | friend class InstrItineraryData; |
| 199 | const InstrItinerary *InstrItineraries; |
| 200 | |
| 201 | unsigned getProcessorID() const { return ProcID; } |
| 202 | |
| 203 | /// Does this machine model include instruction-level scheduling. |
| 204 | bool hasInstrSchedModel() const { return SchedClassTable; } |
| 205 | |
| 206 | /// Return true if this machine model data for all instructions with a |
| 207 | /// scheduling class (itinerary class or SchedRW list). |
| 208 | bool isComplete() const { return CompleteModel; } |
| 209 | |
| 210 | /// Return true if machine supports out of order execution. |
| 211 | bool isOutOfOrder() const { return MicroOpBufferSize > 1; } |
| 212 | |
| 213 | unsigned getNumProcResourceKinds() const { |
| 214 | return NumProcResourceKinds; |
| 215 | } |
| 216 | |
| 217 | const MCProcResourceDesc *getProcResource(unsigned ProcResourceIdx) const { |
| 218 | assert(hasInstrSchedModel() && "No scheduling machine model"); |
| 219 | |
| 220 | assert(ProcResourceIdx < NumProcResourceKinds && "bad proc resource idx"); |
| 221 | return &ProcResourceTable[ProcResourceIdx]; |
| 222 | } |
| 223 | |
| 224 | const MCSchedClassDesc *getSchedClassDesc(unsigned SchedClassIdx) const { |
| 225 | assert(hasInstrSchedModel() && "No scheduling machine model"); |
| 226 | |
| 227 | assert(SchedClassIdx < NumSchedClasses && "bad scheduling class idx"); |
| 228 | return &SchedClassTable[SchedClassIdx]; |
| 229 | } |
| 230 | |
| 231 | /// Returns the latency value for the scheduling class. |
| 232 | static int computeInstrLatency(const MCSubtargetInfo &STI, |
| 233 | const MCSchedClassDesc &SCDesc); |
| 234 | |
| 235 | /// Returns the reciprocal throughput information from a MCSchedClassDesc. |
| 236 | static Optional<double> |
| 237 | getReciprocalThroughput(const MCSubtargetInfo &STI, |
| 238 | const MCSchedClassDesc &SCDesc); |
| 239 | |
| 240 | /// Returns the default initialized model. |
| 241 | static const MCSchedModel &GetDefaultSchedModel() { return Default; } |
| 242 | static const MCSchedModel Default; |
| 243 | }; |
| 244 | |
| 245 | } // End llvm namespace |
| 246 | |
| 247 | #endif |