blob: 622d932f74fdb94533ad686353ea89fe1e08ee90 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines some vectorizer utilities.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_VECTORUTILS_H
15#define LLVM_ANALYSIS_VECTORUTILS_H
16
17#include "llvm/ADT/MapVector.h"
Andrew Scull0372a572018-11-16 15:47:06 +000018#include "llvm/Analysis/LoopAccessAnalysis.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010019#include "llvm/Analysis/TargetLibraryInfo.h"
20#include "llvm/IR/IRBuilder.h"
21
22namespace llvm {
23
24template <typename T> class ArrayRef;
25class DemandedBits;
26class GetElementPtrInst;
27class Loop;
28class ScalarEvolution;
29class TargetTransformInfo;
30class Type;
31class Value;
32
33namespace Intrinsic {
34enum ID : unsigned;
35}
36
Andrew Scullcdfcccc2018-10-05 20:58:37 +010037/// Identify if the intrinsic is trivially vectorizable.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010038/// This method returns true if the intrinsic's argument types are all
39/// scalars for the scalar form of the intrinsic and all vectors for
40/// the vector form of the intrinsic.
41bool isTriviallyVectorizable(Intrinsic::ID ID);
42
Andrew Scullcdfcccc2018-10-05 20:58:37 +010043/// Identifies if the intrinsic has a scalar operand. It checks for
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010044/// ctlz,cttz and powi special intrinsics whose argument is scalar.
45bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
46
Andrew Scullcdfcccc2018-10-05 20:58:37 +010047/// Returns intrinsic ID for call.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010048/// For the input call instruction it finds mapping intrinsic and returns
49/// its intrinsic ID, in case it does not found it return not_intrinsic.
50Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
51 const TargetLibraryInfo *TLI);
52
Andrew Scullcdfcccc2018-10-05 20:58:37 +010053/// Find the operand of the GEP that should be checked for consecutive
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010054/// stores. This ignores trailing indices that have no effect on the final
55/// pointer.
56unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
57
Andrew Scullcdfcccc2018-10-05 20:58:37 +010058/// If the argument is a GEP, then returns the operand identified by
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010059/// getGEPInductionOperand. However, if there is some other non-loop-invariant
60/// operand, it returns that instead.
61Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
62
Andrew Scullcdfcccc2018-10-05 20:58:37 +010063/// If a value has only one user that is a CastInst, return it.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010064Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
65
Andrew Scullcdfcccc2018-10-05 20:58:37 +010066/// Get the stride of a pointer access in a loop. Looks for symbolic
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010067/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
68Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
69
Andrew Scullcdfcccc2018-10-05 20:58:37 +010070/// Given a vector and an element number, see if the scalar value is
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010071/// already around as a register, for example if it were inserted then extracted
72/// from the vector.
73Value *findScalarElement(Value *V, unsigned EltNo);
74
Andrew Scullcdfcccc2018-10-05 20:58:37 +010075/// Get splat value if the input is a splat vector or return nullptr.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010076/// The value may be extracted from a splat constants vector or from
77/// a sequence of instructions that broadcast a single value into a vector.
78const Value *getSplatValue(const Value *V);
79
Andrew Scullcdfcccc2018-10-05 20:58:37 +010080/// Compute a map of integer instructions to their minimum legal type
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010081/// size.
82///
83/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
84/// type (e.g. i32) whenever arithmetic is performed on them.
85///
86/// For targets with native i8 or i16 operations, usually InstCombine can shrink
87/// the arithmetic type down again. However InstCombine refuses to create
88/// illegal types, so for targets without i8 or i16 registers, the lengthening
89/// and shrinking remains.
90///
91/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
92/// their scalar equivalents do not, so during vectorization it is important to
93/// remove these lengthens and truncates when deciding the profitability of
94/// vectorization.
95///
96/// This function analyzes the given range of instructions and determines the
97/// minimum type size each can be converted to. It attempts to remove or
98/// minimize type size changes across each def-use chain, so for example in the
99/// following code:
100///
101/// %1 = load i8, i8*
102/// %2 = add i8 %1, 2
103/// %3 = load i16, i16*
104/// %4 = zext i8 %2 to i32
105/// %5 = zext i16 %3 to i32
106/// %6 = add i32 %4, %5
107/// %7 = trunc i32 %6 to i16
108///
109/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
110/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
111///
112/// If the optional TargetTransformInfo is provided, this function tries harder
113/// to do less work by only looking at illegal types.
114MapVector<Instruction*, uint64_t>
115computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
116 DemandedBits &DB,
117 const TargetTransformInfo *TTI=nullptr);
118
119/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
120/// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the
121/// elements of VL, compute their "intersection" (i.e., the most generic
122/// metadata value that covers all of the individual values), and set I's
123/// metadata for M equal to the intersection value.
124///
125/// This function always sets a (possibly null) value for each K in Kinds.
126Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
127
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100128/// Create an interleave shuffle mask.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100129///
130/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
131/// vectorization factor \p VF into a single wide vector. The mask is of the
132/// form:
133///
134/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
135///
136/// For example, the mask for VF = 4 and NumVecs = 2 is:
137///
138/// <0, 4, 1, 5, 2, 6, 3, 7>.
139Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
140 unsigned NumVecs);
141
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100142/// Create a stride shuffle mask.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100143///
144/// This function creates a shuffle mask whose elements begin at \p Start and
145/// are incremented by \p Stride. The mask can be used to deinterleave an
146/// interleaved vector into separate vectors of vectorization factor \p VF. The
147/// mask is of the form:
148///
149/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
150///
151/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
152///
153/// <0, 2, 4, 6>
154Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
155 unsigned Stride, unsigned VF);
156
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100157/// Create a sequential shuffle mask.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100158///
159/// This function creates shuffle mask whose elements are sequential and begin
160/// at \p Start. The mask contains \p NumInts integers and is padded with \p
161/// NumUndefs undef values. The mask is of the form:
162///
163/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
164///
165/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
166///
167/// <0, 1, 2, 3, undef, undef, undef, undef>
168Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
169 unsigned NumInts, unsigned NumUndefs);
170
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100171/// Concatenate a list of vectors.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100172///
173/// This function generates code that concatenate the vectors in \p Vecs into a
174/// single large vector. The number of vectors should be greater than one, and
175/// their element types should be the same. The number of elements in the
176/// vectors should also be the same; however, if the last vector has fewer
177/// elements, it will be padded with undefs.
178Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
179
Andrew Scull0372a572018-11-16 15:47:06 +0000180/// The group of interleaved loads/stores sharing the same stride and
181/// close to each other.
182///
183/// Each member in this group has an index starting from 0, and the largest
184/// index should be less than interleaved factor, which is equal to the absolute
185/// value of the access's stride.
186///
187/// E.g. An interleaved load group of factor 4:
188/// for (unsigned i = 0; i < 1024; i+=4) {
189/// a = A[i]; // Member of index 0
190/// b = A[i+1]; // Member of index 1
191/// d = A[i+3]; // Member of index 3
192/// ...
193/// }
194///
195/// An interleaved store group of factor 4:
196/// for (unsigned i = 0; i < 1024; i+=4) {
197/// ...
198/// A[i] = a; // Member of index 0
199/// A[i+1] = b; // Member of index 1
200/// A[i+2] = c; // Member of index 2
201/// A[i+3] = d; // Member of index 3
202/// }
203///
204/// Note: the interleaved load group could have gaps (missing members), but
205/// the interleaved store group doesn't allow gaps.
206class InterleaveGroup {
207public:
208 InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
209 : Align(Align), InsertPos(Instr) {
210 assert(Align && "The alignment should be non-zero");
211
212 Factor = std::abs(Stride);
213 assert(Factor > 1 && "Invalid interleave factor");
214
215 Reverse = Stride < 0;
216 Members[0] = Instr;
217 }
218
219 bool isReverse() const { return Reverse; }
220 unsigned getFactor() const { return Factor; }
221 unsigned getAlignment() const { return Align; }
222 unsigned getNumMembers() const { return Members.size(); }
223
224 /// Try to insert a new member \p Instr with index \p Index and
225 /// alignment \p NewAlign. The index is related to the leader and it could be
226 /// negative if it is the new leader.
227 ///
228 /// \returns false if the instruction doesn't belong to the group.
229 bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
230 assert(NewAlign && "The new member's alignment should be non-zero");
231
232 int Key = Index + SmallestKey;
233
234 // Skip if there is already a member with the same index.
235 if (Members.find(Key) != Members.end())
236 return false;
237
238 if (Key > LargestKey) {
239 // The largest index is always less than the interleave factor.
240 if (Index >= static_cast<int>(Factor))
241 return false;
242
243 LargestKey = Key;
244 } else if (Key < SmallestKey) {
245 // The largest index is always less than the interleave factor.
246 if (LargestKey - Key >= static_cast<int>(Factor))
247 return false;
248
249 SmallestKey = Key;
250 }
251
252 // It's always safe to select the minimum alignment.
253 Align = std::min(Align, NewAlign);
254 Members[Key] = Instr;
255 return true;
256 }
257
258 /// Get the member with the given index \p Index
259 ///
260 /// \returns nullptr if contains no such member.
261 Instruction *getMember(unsigned Index) const {
262 int Key = SmallestKey + Index;
263 auto Member = Members.find(Key);
264 if (Member == Members.end())
265 return nullptr;
266
267 return Member->second;
268 }
269
270 /// Get the index for the given member. Unlike the key in the member
271 /// map, the index starts from 0.
272 unsigned getIndex(Instruction *Instr) const {
273 for (auto I : Members)
274 if (I.second == Instr)
275 return I.first - SmallestKey;
276
277 llvm_unreachable("InterleaveGroup contains no such member");
278 }
279
280 Instruction *getInsertPos() const { return InsertPos; }
281 void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
282
283 /// Add metadata (e.g. alias info) from the instructions in this group to \p
284 /// NewInst.
285 ///
286 /// FIXME: this function currently does not add noalias metadata a'la
287 /// addNewMedata. To do that we need to compute the intersection of the
288 /// noalias info from all members.
289 void addMetadata(Instruction *NewInst) const {
290 SmallVector<Value *, 4> VL;
291 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
292 [](std::pair<int, Instruction *> p) { return p.second; });
293 propagateMetadata(NewInst, VL);
294 }
295
296private:
297 unsigned Factor; // Interleave Factor.
298 bool Reverse;
299 unsigned Align;
300 DenseMap<int, Instruction *> Members;
301 int SmallestKey = 0;
302 int LargestKey = 0;
303
304 // To avoid breaking dependences, vectorized instructions of an interleave
305 // group should be inserted at either the first load or the last store in
306 // program order.
307 //
308 // E.g. %even = load i32 // Insert Position
309 // %add = add i32 %even // Use of %even
310 // %odd = load i32
311 //
312 // store i32 %even
313 // %odd = add i32 // Def of %odd
314 // store i32 %odd // Insert Position
315 Instruction *InsertPos;
316};
317
318/// Drive the analysis of interleaved memory accesses in the loop.
319///
320/// Use this class to analyze interleaved accesses only when we can vectorize
321/// a loop. Otherwise it's meaningless to do analysis as the vectorization
322/// on interleaved accesses is unsafe.
323///
324/// The analysis collects interleave groups and records the relationships
325/// between the member and the group in a map.
326class InterleavedAccessInfo {
327public:
328 InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
329 DominatorTree *DT, LoopInfo *LI,
330 const LoopAccessInfo *LAI)
331 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
332
333 ~InterleavedAccessInfo() {
334 SmallPtrSet<InterleaveGroup *, 4> DelSet;
335 // Avoid releasing a pointer twice.
336 for (auto &I : InterleaveGroupMap)
337 DelSet.insert(I.second);
338 for (auto *Ptr : DelSet)
339 delete Ptr;
340 }
341
342 /// Analyze the interleaved accesses and collect them in interleave
343 /// groups. Substitute symbolic strides using \p Strides.
344 void analyzeInterleaving();
345
346 /// Check if \p Instr belongs to any interleave group.
347 bool isInterleaved(Instruction *Instr) const {
348 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
349 }
350
351 /// Get the interleave group that \p Instr belongs to.
352 ///
353 /// \returns nullptr if doesn't have such group.
354 InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
355 auto Group = InterleaveGroupMap.find(Instr);
356 if (Group == InterleaveGroupMap.end())
357 return nullptr;
358 return Group->second;
359 }
360
361 /// Returns true if an interleaved group that may access memory
362 /// out-of-bounds requires a scalar epilogue iteration for correctness.
363 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
364
365private:
366 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
367 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
368 /// The interleaved access analysis can also add new predicates (for example
369 /// by versioning strides of pointers).
370 PredicatedScalarEvolution &PSE;
371
372 Loop *TheLoop;
373 DominatorTree *DT;
374 LoopInfo *LI;
375 const LoopAccessInfo *LAI;
376
377 /// True if the loop may contain non-reversed interleaved groups with
378 /// out-of-bounds accesses. We ensure we don't speculatively access memory
379 /// out-of-bounds by executing at least one scalar epilogue iteration.
380 bool RequiresScalarEpilogue = false;
381
382 /// Holds the relationships between the members and the interleave group.
383 DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
384
385 /// Holds dependences among the memory accesses in the loop. It maps a source
386 /// access to a set of dependent sink accesses.
387 DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
388
389 /// The descriptor for a strided memory access.
390 struct StrideDescriptor {
391 StrideDescriptor() = default;
392 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
393 unsigned Align)
394 : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
395
396 // The access's stride. It is negative for a reverse access.
397 int64_t Stride = 0;
398
399 // The scalar expression of this access.
400 const SCEV *Scev = nullptr;
401
402 // The size of the memory object.
403 uint64_t Size = 0;
404
405 // The alignment of this access.
406 unsigned Align = 0;
407 };
408
409 /// A type for holding instructions and their stride descriptors.
410 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
411
412 /// Create a new interleave group with the given instruction \p Instr,
413 /// stride \p Stride and alignment \p Align.
414 ///
415 /// \returns the newly created interleave group.
416 InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
417 unsigned Align) {
418 assert(!isInterleaved(Instr) && "Already in an interleaved access group");
419 InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
420 return InterleaveGroupMap[Instr];
421 }
422
423 /// Release the group and remove all the relationships.
424 void releaseGroup(InterleaveGroup *Group) {
425 for (unsigned i = 0; i < Group->getFactor(); i++)
426 if (Instruction *Member = Group->getMember(i))
427 InterleaveGroupMap.erase(Member);
428
429 delete Group;
430 }
431
432 /// Collect all the accesses with a constant stride in program order.
433 void collectConstStrideAccesses(
434 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
435 const ValueToValueMap &Strides);
436
437 /// Returns true if \p Stride is allowed in an interleaved group.
438 static bool isStrided(int Stride);
439
440 /// Returns true if \p BB is a predicated block.
441 bool isPredicated(BasicBlock *BB) const {
442 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
443 }
444
445 /// Returns true if LoopAccessInfo can be used for dependence queries.
446 bool areDependencesValid() const {
447 return LAI && LAI->getDepChecker().getDependences();
448 }
449
450 /// Returns true if memory accesses \p A and \p B can be reordered, if
451 /// necessary, when constructing interleaved groups.
452 ///
453 /// \p A must precede \p B in program order. We return false if reordering is
454 /// not necessary or is prevented because \p A and \p B may be dependent.
455 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
456 StrideEntry *B) const {
457 // Code motion for interleaved accesses can potentially hoist strided loads
458 // and sink strided stores. The code below checks the legality of the
459 // following two conditions:
460 //
461 // 1. Potentially moving a strided load (B) before any store (A) that
462 // precedes B, or
463 //
464 // 2. Potentially moving a strided store (A) after any load or store (B)
465 // that A precedes.
466 //
467 // It's legal to reorder A and B if we know there isn't a dependence from A
468 // to B. Note that this determination is conservative since some
469 // dependences could potentially be reordered safely.
470
471 // A is potentially the source of a dependence.
472 auto *Src = A->first;
473 auto SrcDes = A->second;
474
475 // B is potentially the sink of a dependence.
476 auto *Sink = B->first;
477 auto SinkDes = B->second;
478
479 // Code motion for interleaved accesses can't violate WAR dependences.
480 // Thus, reordering is legal if the source isn't a write.
481 if (!Src->mayWriteToMemory())
482 return true;
483
484 // At least one of the accesses must be strided.
485 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
486 return true;
487
488 // If dependence information is not available from LoopAccessInfo,
489 // conservatively assume the instructions can't be reordered.
490 if (!areDependencesValid())
491 return false;
492
493 // If we know there is a dependence from source to sink, assume the
494 // instructions can't be reordered. Otherwise, reordering is legal.
495 return Dependences.find(Src) == Dependences.end() ||
496 !Dependences.lookup(Src).count(Sink);
497 }
498
499 /// Collect the dependences from LoopAccessInfo.
500 ///
501 /// We process the dependences once during the interleaved access analysis to
502 /// enable constant-time dependence queries.
503 void collectDependences() {
504 if (!areDependencesValid())
505 return;
506 auto *Deps = LAI->getDepChecker().getDependences();
507 for (auto Dep : *Deps)
508 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
509 }
510};
511
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100512} // llvm namespace
513
514#endif