blob: e8f2205545ebed3b5b75327fe3a846dbcb1711fc [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
17#include "llvm/Analysis/LoopAnalysisManager.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/PassManager.h"
20#include "llvm/Support/raw_ostream.h"
21
22namespace llvm {
23
24class AAResults;
25class DependenceInfo;
26class LPMUpdater;
27class ScalarEvolution;
28class SCEV;
29class TargetTransformInfo;
30
31using CacheCostTy = int64_t;
32using LoopVectorTy = SmallVector<Loop *, 8>;
33
34/// Represents a memory reference as a base pointer and a set of indexing
35/// operations. For example given the array reference A[i][2j+1][3k+2] in a
36/// 3-dim loop nest:
37/// for(i=0;i<n;++i)
38/// for(j=0;j<m;++j)
39/// for(k=0;k<o;++k)
40/// ... A[i][2j+1][3k+2] ...
41/// We expect:
42/// BasePointer -> A
43/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
44/// Sizes -> [m][o][4]
45class IndexedReference {
46 friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
47
48public:
49 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
50 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
51 ScalarEvolution &SE);
52
53 bool isValid() const { return IsValid; }
54 const SCEV *getBasePointer() const { return BasePointer; }
55 size_t getNumSubscripts() const { return Subscripts.size(); }
56 const SCEV *getSubscript(unsigned SubNum) const {
57 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
58 return Subscripts[SubNum];
59 }
60 const SCEV *getFirstSubscript() const {
61 assert(!Subscripts.empty() && "Expecting non-empty container");
62 return Subscripts.front();
63 }
64 const SCEV *getLastSubscript() const {
65 assert(!Subscripts.empty() && "Expecting non-empty container");
66 return Subscripts.back();
67 }
68
69 /// Return true/false if the current object and the indexed reference \p Other
70 /// are/aren't in the same cache line of size \p CLS. Two references are in
71 /// the same chace line iff the distance between them in the innermost
72 /// dimension is less than the cache line size. Return None if unsure.
73 Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
74 AAResults &AA) const;
75
76 /// Return true if the current object and the indexed reference \p Other
77 /// have distance smaller than \p MaxDistance in the dimension associated with
78 /// the given loop \p L. Return false if the distance is not smaller than \p
79 /// MaxDistance and None if unsure.
80 Optional<bool> hasTemporalReuse(const IndexedReference &Other,
81 unsigned MaxDistance, const Loop &L,
82 DependenceInfo &DI, AAResults &AA) const;
83
84 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
85 /// considered in the innermost position in the loop nest.
86 /// The cost is defined as:
87 /// - equal to one if the reference is loop invariant, or
88 /// - equal to '(TripCount * stride) / cache_line_size' if:
89 /// + the reference stride is less than the cache line size, and
90 /// + the coefficient of this loop's index variable used in all other
91 /// subscripts is zero
92 /// - or otherwise equal to 'TripCount'.
93 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
94
95private:
96 /// Attempt to delinearize the indexed reference.
97 bool delinearize(const LoopInfo &LI);
98
99 /// Return true if the index reference is invariant with respect to loop \p L.
100 bool isLoopInvariant(const Loop &L) const;
101
102 /// Return true if the indexed reference is 'consecutive' in loop \p L.
103 /// An indexed reference is 'consecutive' if the only coefficient that uses
104 /// the loop induction variable is the rightmost one, and the access stride is
105 /// smaller than the cache line size \p CLS.
106 bool isConsecutive(const Loop &L, unsigned CLS) const;
107
108 /// Return the coefficient used in the rightmost dimension.
109 const SCEV *getLastCoefficient() const;
110
111 /// Return true if the coefficient corresponding to induction variable of
112 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
113 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
114 const Loop &L) const;
115
116 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
117 /// recurrence).
118 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
119
120 /// Return true if the given reference \p Other is definetely aliased with
121 /// the indexed reference represented by this class.
122 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
123
124private:
125 /// True if the reference can be delinearized, false otherwise.
126 bool IsValid = false;
127
128 /// Represent the memory reference instruction.
129 Instruction &StoreOrLoadInst;
130
131 /// The base pointer of the memory reference.
132 const SCEV *BasePointer = nullptr;
133
134 /// The subscript (indexes) of the memory reference.
135 SmallVector<const SCEV *, 3> Subscripts;
136
137 /// The dimensions of the memory reference.
138 SmallVector<const SCEV *, 3> Sizes;
139
140 ScalarEvolution &SE;
141};
142
143/// A reference group represents a set of memory references that exhibit
144/// temporal or spacial reuse. Two references belong to the same
145/// reference group with respect to a inner loop L iff:
146/// 1. they have a loop independent dependency, or
147/// 2. they have a loop carried dependence with a small dependence distance
148/// (e.g. less than 2) carried by the inner loop, or
149/// 3. they refer to the same array, and the subscript in their innermost
150/// dimension is less than or equal to 'd' (where 'd' is less than the cache
151/// line size)
152///
153/// Intuitively a reference group represents memory references that access
154/// the same cache line. Conditions 1,2 above account for temporal reuse, while
155/// contition 3 accounts for spacial reuse.
156using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>;
157using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>;
158
159/// \c CacheCost represents the estimated cost of a inner loop as the number of
160/// cache lines used by the memory references it contains.
161/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
162/// the cache costs of all of its reference groups when the loop is considered
163/// to be in the innermost position in the nest.
164/// A reference group represents memory references that fall into the same cache
165/// line. Each reference group is analysed with respect to the innermost loop in
166/// a loop nest. The cost of a reference is defined as follow:
167/// - one if it is loop invariant w.r.t the innermost loop,
168/// - equal to the loop trip count divided by the cache line times the
169/// reference stride if the reference stride is less than the cache line
170/// size (CLS), and the coefficient of this loop's index variable used in all
171/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
172/// - equal to the innermost loop trip count if the reference stride is greater
173/// or equal to the cache line size CLS.
174class CacheCost {
175 friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
176 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
177 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
178
179public:
180 static CacheCostTy constexpr InvalidCost = -1;
181
182 /// Construct a CacheCost object for the loop nest described by \p Loops.
183 /// The optional parameter \p TRT can be used to specify the max. distance
184 /// between array elements accessed in a loop so that the elements are
185 /// classified to have temporal reuse.
186 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
187 TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI,
188 Optional<unsigned> TRT = None);
189
190 /// Create a CacheCost for the loop nest rooted by \p Root.
191 /// The optional parameter \p TRT can be used to specify the max. distance
192 /// between array elements accessed in a loop so that the elements are
193 /// classified to have temporal reuse.
194 static std::unique_ptr<CacheCost>
195 getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
196 Optional<unsigned> TRT = None);
197
198 /// Return the estimated cost of loop \p L if the given loop is part of the
199 /// loop nest associated with this object. Return -1 otherwise.
200 CacheCostTy getLoopCost(const Loop &L) const {
201 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
202 return LCC.first == &L;
203 });
204 return (IT != LoopCosts.end()) ? (*IT).second : -1;
205 }
206
207 /// Return the estimated ordered loop costs.
208 const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
209
210private:
211 /// Calculate the cache footprint of each loop in the nest (when it is
212 /// considered to be in the innermost position).
213 void calculateCacheFootprint();
214
215 /// Partition store/load instructions in the loop nest into reference groups.
216 /// Two or more memory accesses belong in the same reference group if they
217 /// share the same cache line.
218 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
219
220 /// Calculate the cost of the given loop \p L assuming it is the innermost
221 /// loop in nest.
222 CacheCostTy computeLoopCacheCost(const Loop &L,
223 const ReferenceGroupsTy &RefGroups) const;
224
225 /// Compute the cost of a representative reference in reference group \p RG
226 /// when the given loop \p L is considered as the innermost loop in the nest.
227 /// The computed cost is an estimate for the number of cache lines used by the
228 /// reference group. The representative reference cost is defined as:
229 /// - equal to one if the reference is loop invariant, or
230 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
231 /// induction variable is used only in the reference subscript associated
232 /// with loop \p L, and (b) the reference stride is less than the cache
233 /// line size, or
234 /// - TripCount otherwise
235 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
236 const Loop &L) const;
237
238 /// Sort the LoopCosts vector by decreasing cache cost.
239 void sortLoopCosts() {
240 sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
241 return A.second > B.second;
242 });
243 }
244
245private:
246 /// Loops in the loop nest associated with this object.
247 LoopVectorTy Loops;
248
249 /// Trip counts for the loops in the loop nest associated with this object.
250 SmallVector<LoopTripCountTy, 3> TripCounts;
251
252 /// Cache costs for the loops in the loop nest associated with this object.
253 SmallVector<LoopCacheCostTy, 3> LoopCosts;
254
255 /// The max. distance between array elements accessed in a loop so that the
256 /// elements are classified to have temporal reuse.
257 Optional<unsigned> TRT;
258
259 const LoopInfo &LI;
260 ScalarEvolution &SE;
261 TargetTransformInfo &TTI;
262 AAResults &AA;
263 DependenceInfo &DI;
264};
265
266raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
267raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
268
269/// Printer pass for the \c CacheCost results.
270class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
271 raw_ostream &OS;
272
273public:
274 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
275
276 PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
277 LoopStandardAnalysisResults &AR, LPMUpdater &U);
278};
279
280} // namespace llvm
281
282#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H