Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines some vectorizer utilities. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_ANALYSIS_VECTORUTILS_H |
| 15 | #define LLVM_ANALYSIS_VECTORUTILS_H |
| 16 | |
| 17 | #include "llvm/ADT/MapVector.h" |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 18 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 19 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 20 | #include "llvm/IR/IRBuilder.h" |
| 21 | |
| 22 | namespace llvm { |
| 23 | |
| 24 | template <typename T> class ArrayRef; |
| 25 | class DemandedBits; |
| 26 | class GetElementPtrInst; |
| 27 | class Loop; |
| 28 | class ScalarEvolution; |
| 29 | class TargetTransformInfo; |
| 30 | class Type; |
| 31 | class Value; |
| 32 | |
| 33 | namespace Intrinsic { |
| 34 | enum ID : unsigned; |
| 35 | } |
| 36 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 37 | /// Identify if the intrinsic is trivially vectorizable. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 38 | /// This method returns true if the intrinsic's argument types are all |
| 39 | /// scalars for the scalar form of the intrinsic and all vectors for |
| 40 | /// the vector form of the intrinsic. |
| 41 | bool isTriviallyVectorizable(Intrinsic::ID ID); |
| 42 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 43 | /// Identifies if the intrinsic has a scalar operand. It checks for |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 44 | /// ctlz,cttz and powi special intrinsics whose argument is scalar. |
| 45 | bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); |
| 46 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 47 | /// Returns intrinsic ID for call. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 48 | /// For the input call instruction it finds mapping intrinsic and returns |
| 49 | /// its intrinsic ID, in case it does not found it return not_intrinsic. |
| 50 | Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, |
| 51 | const TargetLibraryInfo *TLI); |
| 52 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 53 | /// Find the operand of the GEP that should be checked for consecutive |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 54 | /// stores. This ignores trailing indices that have no effect on the final |
| 55 | /// pointer. |
| 56 | unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); |
| 57 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 58 | /// If the argument is a GEP, then returns the operand identified by |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 59 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant |
| 60 | /// operand, it returns that instead. |
| 61 | Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 62 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 63 | /// If a value has only one user that is a CastInst, return it. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 64 | Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); |
| 65 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 66 | /// Get the stride of a pointer access in a loop. Looks for symbolic |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 67 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. |
| 68 | Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 69 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 70 | /// Given a vector and an element number, see if the scalar value is |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 71 | /// already around as a register, for example if it were inserted then extracted |
| 72 | /// from the vector. |
| 73 | Value *findScalarElement(Value *V, unsigned EltNo); |
| 74 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 75 | /// Get splat value if the input is a splat vector or return nullptr. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 76 | /// The value may be extracted from a splat constants vector or from |
| 77 | /// a sequence of instructions that broadcast a single value into a vector. |
| 78 | const Value *getSplatValue(const Value *V); |
| 79 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 80 | /// Compute a map of integer instructions to their minimum legal type |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 81 | /// size. |
| 82 | /// |
| 83 | /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int |
| 84 | /// type (e.g. i32) whenever arithmetic is performed on them. |
| 85 | /// |
| 86 | /// For targets with native i8 or i16 operations, usually InstCombine can shrink |
| 87 | /// the arithmetic type down again. However InstCombine refuses to create |
| 88 | /// illegal types, so for targets without i8 or i16 registers, the lengthening |
| 89 | /// and shrinking remains. |
| 90 | /// |
| 91 | /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when |
| 92 | /// their scalar equivalents do not, so during vectorization it is important to |
| 93 | /// remove these lengthens and truncates when deciding the profitability of |
| 94 | /// vectorization. |
| 95 | /// |
| 96 | /// This function analyzes the given range of instructions and determines the |
| 97 | /// minimum type size each can be converted to. It attempts to remove or |
| 98 | /// minimize type size changes across each def-use chain, so for example in the |
| 99 | /// following code: |
| 100 | /// |
| 101 | /// %1 = load i8, i8* |
| 102 | /// %2 = add i8 %1, 2 |
| 103 | /// %3 = load i16, i16* |
| 104 | /// %4 = zext i8 %2 to i32 |
| 105 | /// %5 = zext i16 %3 to i32 |
| 106 | /// %6 = add i32 %4, %5 |
| 107 | /// %7 = trunc i32 %6 to i16 |
| 108 | /// |
| 109 | /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes |
| 110 | /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. |
| 111 | /// |
| 112 | /// If the optional TargetTransformInfo is provided, this function tries harder |
| 113 | /// to do less work by only looking at illegal types. |
| 114 | MapVector<Instruction*, uint64_t> |
| 115 | computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, |
| 116 | DemandedBits &DB, |
| 117 | const TargetTransformInfo *TTI=nullptr); |
| 118 | |
| 119 | /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, |
| 120 | /// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the |
| 121 | /// elements of VL, compute their "intersection" (i.e., the most generic |
| 122 | /// metadata value that covers all of the individual values), and set I's |
| 123 | /// metadata for M equal to the intersection value. |
| 124 | /// |
| 125 | /// This function always sets a (possibly null) value for each K in Kinds. |
| 126 | Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); |
| 127 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 128 | /// Create an interleave shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 129 | /// |
| 130 | /// This function creates a shuffle mask for interleaving \p NumVecs vectors of |
| 131 | /// vectorization factor \p VF into a single wide vector. The mask is of the |
| 132 | /// form: |
| 133 | /// |
| 134 | /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> |
| 135 | /// |
| 136 | /// For example, the mask for VF = 4 and NumVecs = 2 is: |
| 137 | /// |
| 138 | /// <0, 4, 1, 5, 2, 6, 3, 7>. |
| 139 | Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, |
| 140 | unsigned NumVecs); |
| 141 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 142 | /// Create a stride shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 143 | /// |
| 144 | /// This function creates a shuffle mask whose elements begin at \p Start and |
| 145 | /// are incremented by \p Stride. The mask can be used to deinterleave an |
| 146 | /// interleaved vector into separate vectors of vectorization factor \p VF. The |
| 147 | /// mask is of the form: |
| 148 | /// |
| 149 | /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> |
| 150 | /// |
| 151 | /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: |
| 152 | /// |
| 153 | /// <0, 2, 4, 6> |
| 154 | Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, |
| 155 | unsigned Stride, unsigned VF); |
| 156 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 157 | /// Create a sequential shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 158 | /// |
| 159 | /// This function creates shuffle mask whose elements are sequential and begin |
| 160 | /// at \p Start. The mask contains \p NumInts integers and is padded with \p |
| 161 | /// NumUndefs undef values. The mask is of the form: |
| 162 | /// |
| 163 | /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> |
| 164 | /// |
| 165 | /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: |
| 166 | /// |
| 167 | /// <0, 1, 2, 3, undef, undef, undef, undef> |
| 168 | Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, |
| 169 | unsigned NumInts, unsigned NumUndefs); |
| 170 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 171 | /// Concatenate a list of vectors. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 172 | /// |
| 173 | /// This function generates code that concatenate the vectors in \p Vecs into a |
| 174 | /// single large vector. The number of vectors should be greater than one, and |
| 175 | /// their element types should be the same. The number of elements in the |
| 176 | /// vectors should also be the same; however, if the last vector has fewer |
| 177 | /// elements, it will be padded with undefs. |
| 178 | Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs); |
| 179 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 180 | /// The group of interleaved loads/stores sharing the same stride and |
| 181 | /// close to each other. |
| 182 | /// |
| 183 | /// Each member in this group has an index starting from 0, and the largest |
| 184 | /// index should be less than interleaved factor, which is equal to the absolute |
| 185 | /// value of the access's stride. |
| 186 | /// |
| 187 | /// E.g. An interleaved load group of factor 4: |
| 188 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 189 | /// a = A[i]; // Member of index 0 |
| 190 | /// b = A[i+1]; // Member of index 1 |
| 191 | /// d = A[i+3]; // Member of index 3 |
| 192 | /// ... |
| 193 | /// } |
| 194 | /// |
| 195 | /// An interleaved store group of factor 4: |
| 196 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 197 | /// ... |
| 198 | /// A[i] = a; // Member of index 0 |
| 199 | /// A[i+1] = b; // Member of index 1 |
| 200 | /// A[i+2] = c; // Member of index 2 |
| 201 | /// A[i+3] = d; // Member of index 3 |
| 202 | /// } |
| 203 | /// |
| 204 | /// Note: the interleaved load group could have gaps (missing members), but |
| 205 | /// the interleaved store group doesn't allow gaps. |
| 206 | class InterleaveGroup { |
| 207 | public: |
| 208 | InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) |
| 209 | : Align(Align), InsertPos(Instr) { |
| 210 | assert(Align && "The alignment should be non-zero"); |
| 211 | |
| 212 | Factor = std::abs(Stride); |
| 213 | assert(Factor > 1 && "Invalid interleave factor"); |
| 214 | |
| 215 | Reverse = Stride < 0; |
| 216 | Members[0] = Instr; |
| 217 | } |
| 218 | |
| 219 | bool isReverse() const { return Reverse; } |
| 220 | unsigned getFactor() const { return Factor; } |
| 221 | unsigned getAlignment() const { return Align; } |
| 222 | unsigned getNumMembers() const { return Members.size(); } |
| 223 | |
| 224 | /// Try to insert a new member \p Instr with index \p Index and |
| 225 | /// alignment \p NewAlign. The index is related to the leader and it could be |
| 226 | /// negative if it is the new leader. |
| 227 | /// |
| 228 | /// \returns false if the instruction doesn't belong to the group. |
| 229 | bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) { |
| 230 | assert(NewAlign && "The new member's alignment should be non-zero"); |
| 231 | |
| 232 | int Key = Index + SmallestKey; |
| 233 | |
| 234 | // Skip if there is already a member with the same index. |
| 235 | if (Members.find(Key) != Members.end()) |
| 236 | return false; |
| 237 | |
| 238 | if (Key > LargestKey) { |
| 239 | // The largest index is always less than the interleave factor. |
| 240 | if (Index >= static_cast<int>(Factor)) |
| 241 | return false; |
| 242 | |
| 243 | LargestKey = Key; |
| 244 | } else if (Key < SmallestKey) { |
| 245 | // The largest index is always less than the interleave factor. |
| 246 | if (LargestKey - Key >= static_cast<int>(Factor)) |
| 247 | return false; |
| 248 | |
| 249 | SmallestKey = Key; |
| 250 | } |
| 251 | |
| 252 | // It's always safe to select the minimum alignment. |
| 253 | Align = std::min(Align, NewAlign); |
| 254 | Members[Key] = Instr; |
| 255 | return true; |
| 256 | } |
| 257 | |
| 258 | /// Get the member with the given index \p Index |
| 259 | /// |
| 260 | /// \returns nullptr if contains no such member. |
| 261 | Instruction *getMember(unsigned Index) const { |
| 262 | int Key = SmallestKey + Index; |
| 263 | auto Member = Members.find(Key); |
| 264 | if (Member == Members.end()) |
| 265 | return nullptr; |
| 266 | |
| 267 | return Member->second; |
| 268 | } |
| 269 | |
| 270 | /// Get the index for the given member. Unlike the key in the member |
| 271 | /// map, the index starts from 0. |
| 272 | unsigned getIndex(Instruction *Instr) const { |
| 273 | for (auto I : Members) |
| 274 | if (I.second == Instr) |
| 275 | return I.first - SmallestKey; |
| 276 | |
| 277 | llvm_unreachable("InterleaveGroup contains no such member"); |
| 278 | } |
| 279 | |
| 280 | Instruction *getInsertPos() const { return InsertPos; } |
| 281 | void setInsertPos(Instruction *Inst) { InsertPos = Inst; } |
| 282 | |
| 283 | /// Add metadata (e.g. alias info) from the instructions in this group to \p |
| 284 | /// NewInst. |
| 285 | /// |
| 286 | /// FIXME: this function currently does not add noalias metadata a'la |
| 287 | /// addNewMedata. To do that we need to compute the intersection of the |
| 288 | /// noalias info from all members. |
| 289 | void addMetadata(Instruction *NewInst) const { |
| 290 | SmallVector<Value *, 4> VL; |
| 291 | std::transform(Members.begin(), Members.end(), std::back_inserter(VL), |
| 292 | [](std::pair<int, Instruction *> p) { return p.second; }); |
| 293 | propagateMetadata(NewInst, VL); |
| 294 | } |
| 295 | |
| 296 | private: |
| 297 | unsigned Factor; // Interleave Factor. |
| 298 | bool Reverse; |
| 299 | unsigned Align; |
| 300 | DenseMap<int, Instruction *> Members; |
| 301 | int SmallestKey = 0; |
| 302 | int LargestKey = 0; |
| 303 | |
| 304 | // To avoid breaking dependences, vectorized instructions of an interleave |
| 305 | // group should be inserted at either the first load or the last store in |
| 306 | // program order. |
| 307 | // |
| 308 | // E.g. %even = load i32 // Insert Position |
| 309 | // %add = add i32 %even // Use of %even |
| 310 | // %odd = load i32 |
| 311 | // |
| 312 | // store i32 %even |
| 313 | // %odd = add i32 // Def of %odd |
| 314 | // store i32 %odd // Insert Position |
| 315 | Instruction *InsertPos; |
| 316 | }; |
| 317 | |
| 318 | /// Drive the analysis of interleaved memory accesses in the loop. |
| 319 | /// |
| 320 | /// Use this class to analyze interleaved accesses only when we can vectorize |
| 321 | /// a loop. Otherwise it's meaningless to do analysis as the vectorization |
| 322 | /// on interleaved accesses is unsafe. |
| 323 | /// |
| 324 | /// The analysis collects interleave groups and records the relationships |
| 325 | /// between the member and the group in a map. |
| 326 | class InterleavedAccessInfo { |
| 327 | public: |
| 328 | InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, |
| 329 | DominatorTree *DT, LoopInfo *LI, |
| 330 | const LoopAccessInfo *LAI) |
| 331 | : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} |
| 332 | |
| 333 | ~InterleavedAccessInfo() { |
| 334 | SmallPtrSet<InterleaveGroup *, 4> DelSet; |
| 335 | // Avoid releasing a pointer twice. |
| 336 | for (auto &I : InterleaveGroupMap) |
| 337 | DelSet.insert(I.second); |
| 338 | for (auto *Ptr : DelSet) |
| 339 | delete Ptr; |
| 340 | } |
| 341 | |
| 342 | /// Analyze the interleaved accesses and collect them in interleave |
| 343 | /// groups. Substitute symbolic strides using \p Strides. |
| 344 | void analyzeInterleaving(); |
| 345 | |
| 346 | /// Check if \p Instr belongs to any interleave group. |
| 347 | bool isInterleaved(Instruction *Instr) const { |
| 348 | return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); |
| 349 | } |
| 350 | |
| 351 | /// Get the interleave group that \p Instr belongs to. |
| 352 | /// |
| 353 | /// \returns nullptr if doesn't have such group. |
| 354 | InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { |
| 355 | auto Group = InterleaveGroupMap.find(Instr); |
| 356 | if (Group == InterleaveGroupMap.end()) |
| 357 | return nullptr; |
| 358 | return Group->second; |
| 359 | } |
| 360 | |
| 361 | /// Returns true if an interleaved group that may access memory |
| 362 | /// out-of-bounds requires a scalar epilogue iteration for correctness. |
| 363 | bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } |
| 364 | |
| 365 | private: |
| 366 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. |
| 367 | /// Simplifies SCEV expressions in the context of existing SCEV assumptions. |
| 368 | /// The interleaved access analysis can also add new predicates (for example |
| 369 | /// by versioning strides of pointers). |
| 370 | PredicatedScalarEvolution &PSE; |
| 371 | |
| 372 | Loop *TheLoop; |
| 373 | DominatorTree *DT; |
| 374 | LoopInfo *LI; |
| 375 | const LoopAccessInfo *LAI; |
| 376 | |
| 377 | /// True if the loop may contain non-reversed interleaved groups with |
| 378 | /// out-of-bounds accesses. We ensure we don't speculatively access memory |
| 379 | /// out-of-bounds by executing at least one scalar epilogue iteration. |
| 380 | bool RequiresScalarEpilogue = false; |
| 381 | |
| 382 | /// Holds the relationships between the members and the interleave group. |
| 383 | DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap; |
| 384 | |
| 385 | /// Holds dependences among the memory accesses in the loop. It maps a source |
| 386 | /// access to a set of dependent sink accesses. |
| 387 | DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; |
| 388 | |
| 389 | /// The descriptor for a strided memory access. |
| 390 | struct StrideDescriptor { |
| 391 | StrideDescriptor() = default; |
| 392 | StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, |
| 393 | unsigned Align) |
| 394 | : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} |
| 395 | |
| 396 | // The access's stride. It is negative for a reverse access. |
| 397 | int64_t Stride = 0; |
| 398 | |
| 399 | // The scalar expression of this access. |
| 400 | const SCEV *Scev = nullptr; |
| 401 | |
| 402 | // The size of the memory object. |
| 403 | uint64_t Size = 0; |
| 404 | |
| 405 | // The alignment of this access. |
| 406 | unsigned Align = 0; |
| 407 | }; |
| 408 | |
| 409 | /// A type for holding instructions and their stride descriptors. |
| 410 | using StrideEntry = std::pair<Instruction *, StrideDescriptor>; |
| 411 | |
| 412 | /// Create a new interleave group with the given instruction \p Instr, |
| 413 | /// stride \p Stride and alignment \p Align. |
| 414 | /// |
| 415 | /// \returns the newly created interleave group. |
| 416 | InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride, |
| 417 | unsigned Align) { |
| 418 | assert(!isInterleaved(Instr) && "Already in an interleaved access group"); |
| 419 | InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align); |
| 420 | return InterleaveGroupMap[Instr]; |
| 421 | } |
| 422 | |
| 423 | /// Release the group and remove all the relationships. |
| 424 | void releaseGroup(InterleaveGroup *Group) { |
| 425 | for (unsigned i = 0; i < Group->getFactor(); i++) |
| 426 | if (Instruction *Member = Group->getMember(i)) |
| 427 | InterleaveGroupMap.erase(Member); |
| 428 | |
| 429 | delete Group; |
| 430 | } |
| 431 | |
| 432 | /// Collect all the accesses with a constant stride in program order. |
| 433 | void collectConstStrideAccesses( |
| 434 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, |
| 435 | const ValueToValueMap &Strides); |
| 436 | |
| 437 | /// Returns true if \p Stride is allowed in an interleaved group. |
| 438 | static bool isStrided(int Stride); |
| 439 | |
| 440 | /// Returns true if \p BB is a predicated block. |
| 441 | bool isPredicated(BasicBlock *BB) const { |
| 442 | return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); |
| 443 | } |
| 444 | |
| 445 | /// Returns true if LoopAccessInfo can be used for dependence queries. |
| 446 | bool areDependencesValid() const { |
| 447 | return LAI && LAI->getDepChecker().getDependences(); |
| 448 | } |
| 449 | |
| 450 | /// Returns true if memory accesses \p A and \p B can be reordered, if |
| 451 | /// necessary, when constructing interleaved groups. |
| 452 | /// |
| 453 | /// \p A must precede \p B in program order. We return false if reordering is |
| 454 | /// not necessary or is prevented because \p A and \p B may be dependent. |
| 455 | bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, |
| 456 | StrideEntry *B) const { |
| 457 | // Code motion for interleaved accesses can potentially hoist strided loads |
| 458 | // and sink strided stores. The code below checks the legality of the |
| 459 | // following two conditions: |
| 460 | // |
| 461 | // 1. Potentially moving a strided load (B) before any store (A) that |
| 462 | // precedes B, or |
| 463 | // |
| 464 | // 2. Potentially moving a strided store (A) after any load or store (B) |
| 465 | // that A precedes. |
| 466 | // |
| 467 | // It's legal to reorder A and B if we know there isn't a dependence from A |
| 468 | // to B. Note that this determination is conservative since some |
| 469 | // dependences could potentially be reordered safely. |
| 470 | |
| 471 | // A is potentially the source of a dependence. |
| 472 | auto *Src = A->first; |
| 473 | auto SrcDes = A->second; |
| 474 | |
| 475 | // B is potentially the sink of a dependence. |
| 476 | auto *Sink = B->first; |
| 477 | auto SinkDes = B->second; |
| 478 | |
| 479 | // Code motion for interleaved accesses can't violate WAR dependences. |
| 480 | // Thus, reordering is legal if the source isn't a write. |
| 481 | if (!Src->mayWriteToMemory()) |
| 482 | return true; |
| 483 | |
| 484 | // At least one of the accesses must be strided. |
| 485 | if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) |
| 486 | return true; |
| 487 | |
| 488 | // If dependence information is not available from LoopAccessInfo, |
| 489 | // conservatively assume the instructions can't be reordered. |
| 490 | if (!areDependencesValid()) |
| 491 | return false; |
| 492 | |
| 493 | // If we know there is a dependence from source to sink, assume the |
| 494 | // instructions can't be reordered. Otherwise, reordering is legal. |
| 495 | return Dependences.find(Src) == Dependences.end() || |
| 496 | !Dependences.lookup(Src).count(Sink); |
| 497 | } |
| 498 | |
| 499 | /// Collect the dependences from LoopAccessInfo. |
| 500 | /// |
| 501 | /// We process the dependences once during the interleaved access analysis to |
| 502 | /// enable constant-time dependence queries. |
| 503 | void collectDependences() { |
| 504 | if (!areDependencesValid()) |
| 505 | return; |
| 506 | auto *Deps = LAI->getDepChecker().getDependences(); |
| 507 | for (auto Dep : *Deps) |
| 508 | Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); |
| 509 | } |
| 510 | }; |
| 511 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 512 | } // llvm namespace |
| 513 | |
| 514 | #endif |