Blame - linux-x64/clang/include/llvm/Analysis/VectorUtils.h - hafnium/prebuilts

blob: d8fb970a9f79edbb70a46613233299ef05518c22 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===- llvm/Analysis/VectorUtils.h - Vector utilities ------------ C++ --===//
				2	//
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// This file defines some vectorizer utilities.
				10	//
				11	//===----------------------------------------------------------------------===//
				12
				13	#ifndef LLVM_ANALYSIS_VECTORUTILS_H
				14	#define LLVM_ANALYSIS_VECTORUTILS_H
				15
				16	#include "llvm/ADT/MapVector.h"
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	17	#include "llvm/ADT/SmallVector.h"
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	18	#include "llvm/Analysis/LoopAccessAnalysis.h"
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	19	#include "llvm/Support/CheckedArithmetic.h"
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	20
				21	namespace llvm {
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	22	class TargetLibraryInfo;
				23
				24	/// Describes the type of Parameters
				25	enum class VFParamKind {
				26	Vector, // No semantic information.
				27	OMP_Linear, // declare simd linear(i)
				28	OMP_LinearRef, // declare simd linear(ref(i))
				29	OMP_LinearVal, // declare simd linear(val(i))
				30	OMP_LinearUVal, // declare simd linear(uval(i))
				31	OMP_LinearPos, // declare simd linear(i:c) uniform(c)
				32	OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
				33	OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
				34	OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
				35	OMP_Uniform, // declare simd uniform(i)
				36	GlobalPredicate, // Global logical predicate that acts on all lanes
				37	// of the input and output mask concurrently. For
				38	// example, it is implied by the `M` token in the
				39	// Vector Function ABI mangled name.
				40	Unknown
				41	};
				42
				43	/// Describes the type of Instruction Set Architecture
				44	enum class VFISAKind {
				45	AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
				46	SVE, // AArch64 Scalable Vector Extension
				47	SSE, // x86 SSE
				48	AVX, // x86 AVX
				49	AVX2, // x86 AVX2
				50	AVX512, // x86 AVX512
				51	LLVM, // LLVM internal ISA for functions that are not
				52	// attached to an existing ABI via name mangling.
				53	Unknown // Unknown ISA
				54	};
				55
				56	/// Encapsulates information needed to describe a parameter.
				57	///
				58	/// The description of the parameter is not linked directly to
				59	/// OpenMP or any other vector function description. This structure
				60	/// is extendible to handle other paradigms that describe vector
				61	/// functions and their parameters.
				62	struct VFParameter {
				63	unsigned ParamPos; // Parameter Position in Scalar Function.
				64	VFParamKind ParamKind; // Kind of Parameter.
				65	int LinearStepOrPos = 0; // Step or Position of the Parameter.
				66	Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
				67
				68	// Comparison operator.
				69	bool operator==(const VFParameter &Other) const {
				70	return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
				71	std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
				72	Other.Alignment);
				73	}
				74	};
				75
				76	/// Contains the information about the kind of vectorization
				77	/// available.
				78	///
				79	/// This object in independent on the paradigm used to
				80	/// represent vector functions. in particular, it is not attached to
				81	/// any target-specific ABI.
				82	struct VFShape {
				83	unsigned VF; // Vectorization factor.
				84	bool IsScalable; // True if the function is a scalable function.
				85	SmallVector<VFParameter, 8> Parameters; // List of parameter information.
				86	// Comparison operator.
				87	bool operator==(const VFShape &Other) const {
				88	return std::tie(VF, IsScalable, Parameters) ==
				89	std::tie(Other.VF, Other.IsScalable, Other.Parameters);
				90	}
				91
				92	/// Update the parameter in position P.ParamPos to P.
				93	void updateParam(VFParameter P) {
				94	assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
				95	Parameters[P.ParamPos] = P;
				96	assert(hasValidParameterList() && "Invalid parameter list");
				97	}
				98
				99	// Retrieve the VFShape that can be used to map a (scalar) function to itself,
				100	// with VF = 1.
				101	static VFShape getScalarShape(const CallInst &CI) {
				102	return VFShape::get(CI, ElementCount::getFixed(1),
				103	/HasGlobalPredicate/ false);
				104	}
				105
				106	// Retrieve the basic vectorization shape of the function, where all
				107	// parameters are mapped to VFParamKind::Vector with \p EC
				108	// lanes. Specifies whether the function has a Global Predicate
				109	// argument via \p HasGlobalPred.
				110	static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
				111	SmallVector<VFParameter, 8> Parameters;
				112	for (unsigned I = 0; I < CI.arg_size(); ++I)
				113	Parameters.push_back(VFParameter({I, VFParamKind::Vector}));
				114	if (HasGlobalPred)
				115	Parameters.push_back(
				116	VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
				117
				118	return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
				119	}
				120	/// Sanity check on the Parameters in the VFShape.
				121	bool hasValidParameterList() const;
				122	};
				123
				124	/// Holds the VFShape for a specific scalar to vector function mapping.
				125	struct VFInfo {
				126	VFShape Shape; /// Classification of the vector function.
				127	std::string ScalarName; /// Scalar Function Name.
				128	std::string VectorName; /// Vector Function Name associated to this VFInfo.
				129	VFISAKind ISA; /// Instruction Set Architecture.
				130
				131	// Comparison operator.
				132	bool operator==(const VFInfo &Other) const {
				133	return std::tie(Shape, ScalarName, VectorName, ISA) ==
				134	std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
				135	}
				136	};
				137
				138	namespace VFABI {
				139	/// LLVM Internal VFABI ISA token for vector functions.
				140	static constexpr char const *_LLVM_ = "_LLVM_";
				141	/// Prefix for internal name redirection for vector function that
				142	/// tells the compiler to scalarize the call using the scalar name
				143	/// of the function. For example, a mangled name like
				144	/// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
				145	/// vectorizer to vectorize the scalar call `foo`, and to scalarize
				146	/// it once vectorization is done.
				147	static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
				148
				149	/// Function to construct a VFInfo out of a mangled names in the
				150	/// following format:
				151	///
				152	/// <VFABI_name>{(<redirection>)}
				153	///
				154	/// where <VFABI_name> is the name of the vector function, mangled according
				155	/// to the rules described in the Vector Function ABI of the target vector
				156	/// extension (or <isa> from now on). The <VFABI_name> is in the following
				157	/// format:
				158	///
				159	/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
				160	///
				161	/// This methods support demangling rules for the following <isa>:
				162	///
				163	/// * AArch64: https://developer.arm.com/docs/101129/latest
				164	///
				165	/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
				166	/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
				167	///
				168	/// \param MangledName -> input string in the format
				169	/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
				170	/// \param M -> Module used to retrieve informations about the vector
				171	/// function that are not possible to retrieve from the mangled
				172	/// name. At the moment, this parameter is needed only to retrieve the
				173	/// Vectorization Factor of scalable vector functions from their
				174	/// respective IR declarations.
				175	Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M);
				176
				177	/// This routine mangles the given VectorName according to the LangRef
				178	/// specification for vector-function-abi-variant attribute and is specific to
				179	/// the TLI mappings. It is the responsibility of the caller to make sure that
				180	/// this is only used if all parameters in the vector function are vector type.
				181	/// This returned string holds scalar-to-vector mapping:
				182	/// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
				183	///
				184	/// where:
				185	///
				186	/// <isa> = "_LLVM_"
				187	/// <mask> = "N". Note: TLI does not support masked interfaces.
				188	/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
				189	/// field of the `VecDesc` struct.
				190	/// <vparams> = "v", as many as are the numArgs.
				191	/// <scalarname> = the name of the scalar function.
				192	/// <vectorname> = the name of the vector function.
				193	std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
				194	unsigned numArgs, unsigned VF);
				195
				196	/// Retrieve the `VFParamKind` from a string token.
				197	VFParamKind getVFParamKindFromString(const StringRef Token);
				198
				199	// Name of the attribute where the variant mappings are stored.
				200	static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
				201
				202	/// Populates a set of strings representing the Vector Function ABI variants
				203	/// associated to the CallInst CI. If the CI does not contain the
				204	/// vector-function-abi-variant attribute, we return without populating
				205	/// VariantMappings, i.e. callers of getVectorVariantNames need not check for
				206	/// the presence of the attribute (see InjectTLIMappings).
				207	void getVectorVariantNames(const CallInst &CI,
				208	SmallVectorImpl<std::string> &VariantMappings);
				209	} // end namespace VFABI
				210
				211	/// The Vector Function Database.
				212	///
				213	/// Helper class used to find the vector functions associated to a
				214	/// scalar CallInst.
				215	class VFDatabase {
				216	/// The Module of the CallInst CI.
				217	const Module *M;
				218	/// The CallInst instance being queried for scalar to vector mappings.
				219	const CallInst &CI;
				220	/// List of vector functions descriptors associated to the call
				221	/// instruction.
				222	const SmallVector<VFInfo, 8> ScalarToVectorMappings;
				223
				224	/// Retrieve the scalar-to-vector mappings associated to the rule of
				225	/// a vector Function ABI.
				226	static void getVFABIMappings(const CallInst &CI,
				227	SmallVectorImpl<VFInfo> &Mappings) {
				228	if (!CI.getCalledFunction())
				229	return;
				230
				231	const StringRef ScalarName = CI.getCalledFunction()->getName();
				232
				233	SmallVector<std::string, 8> ListOfStrings;
				234	// The check for the vector-function-abi-variant attribute is done when
				235	// retrieving the vector variant names here.
				236	VFABI::getVectorVariantNames(CI, ListOfStrings);
				237	if (ListOfStrings.empty())
				238	return;
				239	for (const auto &MangledName : ListOfStrings) {
				240	const Optional<VFInfo> Shape =
				241	VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
				242	// A match is found via scalar and vector names, and also by
				243	// ensuring that the variant described in the attribute has a
				244	// corresponding definition or declaration of the vector
				245	// function in the Module M.
				246	if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
				247	assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
				248	"Vector function is missing.");
				249	Mappings.push_back(Shape.getValue());
				250	}
				251	}
				252	}
				253
				254	public:
				255	/// Retrieve all the VFInfo instances associated to the CallInst CI.
				256	static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) {
				257	SmallVector<VFInfo, 8> Ret;
				258
				259	// Get mappings from the Vector Function ABI variants.
				260	getVFABIMappings(CI, Ret);
				261
				262	// Other non-VFABI variants should be retrieved here.
				263
				264	return Ret;
				265	}
				266
				267	/// Constructor, requires a CallInst instance.
				268	VFDatabase(CallInst &CI)
				269	: M(CI.getModule()), CI(CI),
				270	ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
				271	/// \defgroup VFDatabase query interface.
				272	///
				273	/// @{
				274	/// Retrieve the Function with VFShape \p Shape.
				275	Function *getVectorizedFunction(const VFShape &Shape) const {
				276	if (Shape == VFShape::getScalarShape(CI))
				277	return CI.getCalledFunction();
				278
				279	for (const auto &Info : ScalarToVectorMappings)
				280	if (Info.Shape == Shape)
				281	return M->getFunction(Info.VectorName);
				282
				283	return nullptr;
				284	}
				285	/// @}
				286	};
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	287
				288	template <typename T> class ArrayRef;
				289	class DemandedBits;
				290	class GetElementPtrInst;
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	291	template <typename InstTy> class InterleaveGroup;
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	292	class IRBuilderBase;
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	293	class Loop;
				294	class ScalarEvolution;
				295	class TargetTransformInfo;
				296	class Type;
				297	class Value;
				298
				299	namespace Intrinsic {
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	300	typedef unsigned ID;
				301	}
				302
				303	/// A helper function for converting Scalar types to vector types. If
				304	/// the incoming type is void, we return void. If the EC represents a
				305	/// scalar, we return the scalar type.
				306	inline Type ToVectorTy(Type Scalar, ElementCount EC) {
				307	if (Scalar->isVoidTy() \|\| EC.isScalar())
				308	return Scalar;
				309	return VectorType::get(Scalar, EC);
				310	}
				311
				312	inline Type ToVectorTy(Type Scalar, unsigned VF) {
				313	return ToVectorTy(Scalar, ElementCount::getFixed(VF));
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	314	}
				315
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	316	/// Identify if the intrinsic is trivially vectorizable.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	317	/// This method returns true if the intrinsic's argument types are all scalars
				318	/// for the scalar form of the intrinsic and all vectors (or scalars handled by
				319	/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	320	bool isTriviallyVectorizable(Intrinsic::ID ID);
				321
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	322	/// Identifies if the vector form of the intrinsic has a scalar operand.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	323	bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
				324
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	325	/// Returns intrinsic ID for call.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	326	/// For the input call instruction it finds mapping intrinsic and returns
				327	/// its intrinsic ID, in case it does not found it return not_intrinsic.
				328	Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
				329	const TargetLibraryInfo *TLI);
				330
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	331	/// Find the operand of the GEP that should be checked for consecutive
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	332	/// stores. This ignores trailing indices that have no effect on the final
				333	/// pointer.
				334	unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
				335
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	336	/// If the argument is a GEP, then returns the operand identified by
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	337	/// getGEPInductionOperand. However, if there is some other non-loop-invariant
				338	/// operand, it returns that instead.
				339	Value stripGetElementPtr(Value Ptr, ScalarEvolution SE, Loop Lp);
				340
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	341	/// If a value has only one user that is a CastInst, return it.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	342	Value getUniqueCastUse(Value Ptr, Loop Lp, Type Ty);
				343
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	344	/// Get the stride of a pointer access in a loop. Looks for symbolic
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	345	/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
				346	Value getStrideFromPointer(Value Ptr, ScalarEvolution SE, Loop Lp);
				347
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	348	/// Given a vector and an element number, see if the scalar value is
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	349	/// already around as a register, for example if it were inserted then extracted
				350	/// from the vector.
				351	Value findScalarElement(Value V, unsigned EltNo);
				352
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	353	/// If all non-negative \p Mask elements are the same value, return that value.
				354	/// If all elements are negative (undefined) or \p Mask contains different
				355	/// non-negative values, return -1.
				356	int getSplatIndex(ArrayRef<int> Mask);
				357
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	358	/// Get splat value if the input is a splat vector or return nullptr.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	359	/// The value may be extracted from a splat constants vector or from
				360	/// a sequence of instructions that broadcast a single value into a vector.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	361	Value getSplatValue(const Value V);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	362
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	363	/// Return true if each element of the vector value \p V is poisoned or equal to
				364	/// every other non-poisoned element. If an index element is specified, either
				365	/// every element of the vector is poisoned or the element at that index is not
				366	/// poisoned and equal to every other non-poisoned element.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	367	/// This may be more powerful than the related getSplatValue() because it is
				368	/// not limited by finding a scalar source value to a splatted vector.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	369	bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
				370
				371	/// Replace each shuffle mask index with the scaled sequential indices for an
				372	/// equivalent mask of narrowed elements. Mask elements that are less than 0
				373	/// (sentinel values) are repeated in the output mask.
				374	///
				375	/// Example with Scale = 4:
				376	/// <4 x i32> <3, 2, 0, -1> -->
				377	/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
				378	///
				379	/// This is the reverse process of widening shuffle mask elements, but it always
				380	/// succeeds because the indexes can always be multiplied (scaled up) to map to
				381	/// narrower vector elements.
				382	void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
				383	SmallVectorImpl<int> &ScaledMask);
				384
				385	/// Try to transform a shuffle mask by replacing elements with the scaled index
				386	/// for an equivalent mask of widened elements. If all mask elements that would
				387	/// map to a wider element of the new mask are the same negative number
				388	/// (sentinel value), that element of the new mask is the same value. If any
				389	/// element in a given slice is negative and some other element in that slice is
				390	/// not the same value, return false (partial matches with sentinel values are
				391	/// not allowed).
				392	///
				393	/// Example with Scale = 4:
				394	/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
				395	/// <4 x i32> <3, 2, 0, -1>
				396	///
				397	/// This is the reverse process of narrowing shuffle mask elements if it
				398	/// succeeds. This transform is not always possible because indexes may not
				399	/// divide evenly (scale down) to map to wider vector elements.
				400	bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
				401	SmallVectorImpl<int> &ScaledMask);
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	402
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	403	/// Compute a map of integer instructions to their minimum legal type
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	404	/// size.
				405	///
				406	/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
				407	/// type (e.g. i32) whenever arithmetic is performed on them.
				408	///
				409	/// For targets with native i8 or i16 operations, usually InstCombine can shrink
				410	/// the arithmetic type down again. However InstCombine refuses to create
				411	/// illegal types, so for targets without i8 or i16 registers, the lengthening
				412	/// and shrinking remains.
				413	///
				414	/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
				415	/// their scalar equivalents do not, so during vectorization it is important to
				416	/// remove these lengthens and truncates when deciding the profitability of
				417	/// vectorization.
				418	///
				419	/// This function analyzes the given range of instructions and determines the
				420	/// minimum type size each can be converted to. It attempts to remove or
				421	/// minimize type size changes across each def-use chain, so for example in the
				422	/// following code:
				423	///
				424	/// %1 = load i8, i8*
				425	/// %2 = add i8 %1, 2
				426	/// %3 = load i16, i16*
				427	/// %4 = zext i8 %2 to i32
				428	/// %5 = zext i16 %3 to i32
				429	/// %6 = add i32 %4, %5
				430	/// %7 = trunc i32 %6 to i16
				431	///
				432	/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
				433	/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
				434	///
				435	/// If the optional TargetTransformInfo is provided, this function tries harder
				436	/// to do less work by only looking at illegal types.
				437	MapVector<Instruction*, uint64_t>
				438	computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
				439	DemandedBits &DB,
				440	const TargetTransformInfo *TTI=nullptr);
				441
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	442	/// Compute the union of two access-group lists.
				443	///
				444	/// If the list contains just one access group, it is returned directly. If the
				445	/// list is empty, returns nullptr.
				446	MDNode uniteAccessGroups(MDNode AccGroups1, MDNode *AccGroups2);
				447
				448	/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
				449	/// are both in. If either instruction does not access memory at all, it is
				450	/// considered to be in every list.
				451	///
				452	/// If the list contains just one access group, it is returned directly. If the
				453	/// list is empty, returns nullptr.
				454	MDNode intersectAccessGroups(const Instruction Inst1,
				455	const Instruction *Inst2);
				456
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	457	/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	458	/// MD_nontemporal, MD_access_group].
				459	/// For K in Kinds, we get the MDNode for K from each of the
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	460	/// elements of VL, compute their "intersection" (i.e., the most generic
				461	/// metadata value that covers all of the individual values), and set I's
				462	/// metadata for M equal to the intersection value.
				463	///
				464	/// This function always sets a (possibly null) value for each K in Kinds.
				465	Instruction propagateMetadata(Instruction I, ArrayRef<Value *> VL);
				466
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	467	/// Create a mask that filters the members of an interleave group where there
				468	/// are gaps.
				469	///
				470	/// For example, the mask for \p Group with interleave-factor 3
				471	/// and \p VF 4, that has only its first member present is:
				472	///
				473	/// <1,0,0,1,0,0,1,0,0,1,0,0>
				474	///
				475	/// Note: The result is a mask of 0's and 1's, as opposed to the other
				476	/// create[*]Mask() utilities which create a shuffle mask (mask that
				477	/// consists of indices).
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	478	Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	479	const InterleaveGroup<Instruction> &Group);
				480
				481	/// Create a mask with replicated elements.
				482	///
				483	/// This function creates a shuffle mask for replicating each of the \p VF
				484	/// elements in a vector \p ReplicationFactor times. It can be used to
				485	/// transform a mask of \p VF elements into a mask of
				486	/// \p VF * \p ReplicationFactor elements used by a predicated
				487	/// interleaved-group of loads/stores whose Interleaved-factor ==
				488	/// \p ReplicationFactor.
				489	///
				490	/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
				491	///
				492	/// <0,0,0,1,1,1,2,2,2,3,3,3>
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	493	llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
				494	unsigned VF);
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	495
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	496	/// Create an interleave shuffle mask.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	497	///
				498	/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
				499	/// vectorization factor \p VF into a single wide vector. The mask is of the
				500	/// form:
				501	///
				502	/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
				503	///
				504	/// For example, the mask for VF = 4 and NumVecs = 2 is:
				505	///
				506	/// <0, 4, 1, 5, 2, 6, 3, 7>.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	507	llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	508
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	509	/// Create a stride shuffle mask.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	510	///
				511	/// This function creates a shuffle mask whose elements begin at \p Start and
				512	/// are incremented by \p Stride. The mask can be used to deinterleave an
				513	/// interleaved vector into separate vectors of vectorization factor \p VF. The
				514	/// mask is of the form:
				515	///
				516	/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
				517	///
				518	/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
				519	///
				520	/// <0, 2, 4, 6>
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	521	llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
				522	unsigned VF);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	523
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	524	/// Create a sequential shuffle mask.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	525	///
				526	/// This function creates shuffle mask whose elements are sequential and begin
				527	/// at \p Start. The mask contains \p NumInts integers and is padded with \p
				528	/// NumUndefs undef values. The mask is of the form:
				529	///
				530	/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
				531	///
				532	/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
				533	///
				534	/// <0, 1, 2, 3, undef, undef, undef, undef>
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	535	llvm::SmallVector<int, 16>
				536	createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	537
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	538	/// Concatenate a list of vectors.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	539	///
				540	/// This function generates code that concatenate the vectors in \p Vecs into a
				541	/// single large vector. The number of vectors should be greater than one, and
				542	/// their element types should be the same. The number of elements in the
				543	/// vectors should also be the same; however, if the last vector has fewer
				544	/// elements, it will be padded with undefs.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	545	Value concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value > Vecs);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	546
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	547	/// Given a mask vector of i1, Return true if all of the elements of this
				548	/// predicate mask are known to be false or undef. That is, return true if all
				549	/// lanes can be assumed inactive.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	550	bool maskIsAllZeroOrUndef(Value *Mask);
				551
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	552	/// Given a mask vector of i1, Return true if all of the elements of this
				553	/// predicate mask are known to be true or undef. That is, return true if all
				554	/// lanes can be assumed active.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	555	bool maskIsAllOneOrUndef(Value *Mask);
				556
				557	/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
				558	/// for each lane which may be active.
				559	APInt possiblyDemandedEltsInMask(Value *Mask);
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	560
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	561	/// The group of interleaved loads/stores sharing the same stride and
				562	/// close to each other.
				563	///
				564	/// Each member in this group has an index starting from 0, and the largest
				565	/// index should be less than interleaved factor, which is equal to the absolute
				566	/// value of the access's stride.
				567	///
				568	/// E.g. An interleaved load group of factor 4:
				569	/// for (unsigned i = 0; i < 1024; i+=4) {
				570	/// a = A[i]; // Member of index 0
				571	/// b = A[i+1]; // Member of index 1
				572	/// d = A[i+3]; // Member of index 3
				573	/// ...
				574	/// }
				575	///
				576	/// An interleaved store group of factor 4:
				577	/// for (unsigned i = 0; i < 1024; i+=4) {
				578	/// ...
				579	/// A[i] = a; // Member of index 0
				580	/// A[i+1] = b; // Member of index 1
				581	/// A[i+2] = c; // Member of index 2
				582	/// A[i+3] = d; // Member of index 3
				583	/// }
				584	///
				585	/// Note: the interleaved load group could have gaps (missing members), but
				586	/// the interleaved store group doesn't allow gaps.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	587	template <typename InstTy> class InterleaveGroup {
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	588	public:
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	589	InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
				590	: Factor(Factor), Reverse(Reverse), Alignment(Alignment),
				591	InsertPos(nullptr) {}
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	592
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	593	InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
				594	: Alignment(Alignment), InsertPos(Instr) {
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	595	Factor = std::abs(Stride);
				596	assert(Factor > 1 && "Invalid interleave factor");
				597
				598	Reverse = Stride < 0;
				599	Members[0] = Instr;
				600	}
				601
				602	bool isReverse() const { return Reverse; }
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	603	uint32_t getFactor() const { return Factor; }
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	604	LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const,
				605	"Use getAlign instead.") {
				606	return Alignment.value();
				607	}
				608	Align getAlign() const { return Alignment; }
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	609	uint32_t getNumMembers() const { return Members.size(); }
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	610
				611	/// Try to insert a new member \p Instr with index \p Index and
				612	/// alignment \p NewAlign. The index is related to the leader and it could be
				613	/// negative if it is the new leader.
				614	///
				615	/// \returns false if the instruction doesn't belong to the group.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	616	bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	617	// Make sure the key fits in an int32_t.
				618	Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
				619	if (!MaybeKey)
				620	return false;
				621	int32_t Key = *MaybeKey;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	622
				623	// Skip if there is already a member with the same index.
				624	if (Members.find(Key) != Members.end())
				625	return false;
				626
				627	if (Key > LargestKey) {
				628	// The largest index is always less than the interleave factor.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	629	if (Index >= static_cast<int32_t>(Factor))
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	630	return false;
				631
				632	LargestKey = Key;
				633	} else if (Key < SmallestKey) {
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	634
				635	// Make sure the largest index fits in an int32_t.
				636	Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
				637	if (!MaybeLargestIndex)
				638	return false;
				639
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	640	// The largest index is always less than the interleave factor.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	641	if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	642	return false;
				643
				644	SmallestKey = Key;
				645	}
				646
				647	// It's always safe to select the minimum alignment.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	648	Alignment = std::min(Alignment, NewAlign);
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	649	Members[Key] = Instr;
				650	return true;
				651	}
				652
				653	/// Get the member with the given index \p Index
				654	///
				655	/// \returns nullptr if contains no such member.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	656	InstTy *getMember(uint32_t Index) const {
				657	int32_t Key = SmallestKey + Index;
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	658	return Members.lookup(Key);
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	659	}
				660
				661	/// Get the index for the given member. Unlike the key in the member
				662	/// map, the index starts from 0.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	663	uint32_t getIndex(const InstTy *Instr) const {
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	664	for (auto I : Members) {
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	665	if (I.second == Instr)
				666	return I.first - SmallestKey;
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	667	}
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	668
				669	llvm_unreachable("InterleaveGroup contains no such member");
				670	}
				671
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	672	InstTy *getInsertPos() const { return InsertPos; }
				673	void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	674
				675	/// Add metadata (e.g. alias info) from the instructions in this group to \p
				676	/// NewInst.
				677	///
				678	/// FIXME: this function currently does not add noalias metadata a'la
				679	/// addNewMedata. To do that we need to compute the intersection of the
				680	/// noalias info from all members.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	681	void addMetadata(InstTy *NewInst) const;
				682
				683	/// Returns true if this Group requires a scalar iteration to handle gaps.
				684	bool requiresScalarEpilogue() const {
				685	// If the last member of the Group exists, then a scalar epilog is not
				686	// needed for this group.
				687	if (getMember(getFactor() - 1))
				688	return false;
				689
				690	// We have a group with gaps. It therefore cannot be a group of stores,
				691	// and it can't be a reversed access, because such groups get invalidated.
				692	assert(!getMember(0)->mayWriteToMemory() &&
				693	"Group should have been invalidated");
				694	assert(!isReverse() && "Group should have been invalidated");
				695
				696	// This is a group of loads, with gaps, and without a last-member
				697	return true;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	698	}
				699
				700	private:
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	701	uint32_t Factor; // Interleave Factor.
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	702	bool Reverse;
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	703	Align Alignment;
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame]	704	DenseMap<int32_t, InstTy *> Members;
				705	int32_t SmallestKey = 0;
				706	int32_t LargestKey = 0;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	707
				708	// To avoid breaking dependences, vectorized instructions of an interleave
				709	// group should be inserted at either the first load or the last store in
				710	// program order.
				711	//
				712	// E.g. %even = load i32 // Insert Position
				713	// %add = add i32 %even // Use of %even
				714	// %odd = load i32
				715	//
				716	// store i32 %even
				717	// %odd = add i32 // Def of %odd
				718	// store i32 %odd // Insert Position
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	719	InstTy *InsertPos;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	720	};
				721
				722	/// Drive the analysis of interleaved memory accesses in the loop.
				723	///
				724	/// Use this class to analyze interleaved accesses only when we can vectorize
				725	/// a loop. Otherwise it's meaningless to do analysis as the vectorization
				726	/// on interleaved accesses is unsafe.
				727	///
				728	/// The analysis collects interleave groups and records the relationships
				729	/// between the member and the group in a map.
				730	class InterleavedAccessInfo {
				731	public:
				732	InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
				733	DominatorTree DT, LoopInfo LI,
				734	const LoopAccessInfo *LAI)
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	735	: PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	736
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	737	~InterleavedAccessInfo() { invalidateGroups(); }
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	738
				739	/// Analyze the interleaved accesses and collect them in interleave
				740	/// groups. Substitute symbolic strides using \p Strides.
				741	/// Consider also predicated loads/stores in the analysis if
				742	/// \p EnableMaskedInterleavedGroup is true.
				743	void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
				744
				745	/// Invalidate groups, e.g., in case all blocks in loop will be predicated
				746	/// contrary to original assumption. Although we currently prevent group
				747	/// formation for predicated accesses, we may be able to relax this limitation
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	748	/// in the future once we handle more complicated blocks. Returns true if any
				749	/// groups were invalidated.
				750	bool invalidateGroups() {
				751	if (InterleaveGroups.empty()) {
				752	assert(
				753	!RequiresScalarEpilogue &&
				754	"RequiresScalarEpilog should not be set without interleave groups");
				755	return false;
				756	}
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	757
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	758	InterleaveGroupMap.clear();
				759	for (auto *Ptr : InterleaveGroups)
				760	delete Ptr;
				761	InterleaveGroups.clear();
				762	RequiresScalarEpilogue = false;
				763	return true;
				764	}
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	765
				766	/// Check if \p Instr belongs to any interleave group.
				767	bool isInterleaved(Instruction *Instr) const {
				768	return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
				769	}
				770
				771	/// Get the interleave group that \p Instr belongs to.
				772	///
				773	/// \returns nullptr if doesn't have such group.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	774	InterleaveGroup<Instruction> *
				775	getInterleaveGroup(const Instruction *Instr) const {
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	776	return InterleaveGroupMap.lookup(Instr);
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	777	}
				778
				779	iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>
				780	getInterleaveGroups() {
				781	return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	782	}
				783
				784	/// Returns true if an interleaved group that may access memory
				785	/// out-of-bounds requires a scalar epilogue iteration for correctness.
				786	bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
				787
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	788	/// Invalidate groups that require a scalar epilogue (due to gaps). This can
				789	/// happen when optimizing for size forbids a scalar epilogue, and the gap
				790	/// cannot be filtered by masking the load/store.
				791	void invalidateGroupsRequiringScalarEpilogue();
				792
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	793	private:
				794	/// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
				795	/// Simplifies SCEV expressions in the context of existing SCEV assumptions.
				796	/// The interleaved access analysis can also add new predicates (for example
				797	/// by versioning strides of pointers).
				798	PredicatedScalarEvolution &PSE;
				799
				800	Loop *TheLoop;
				801	DominatorTree *DT;
				802	LoopInfo *LI;
				803	const LoopAccessInfo *LAI;
				804
				805	/// True if the loop may contain non-reversed interleaved groups with
				806	/// out-of-bounds accesses. We ensure we don't speculatively access memory
				807	/// out-of-bounds by executing at least one scalar epilogue iteration.
				808	bool RequiresScalarEpilogue = false;
				809
				810	/// Holds the relationships between the members and the interleave group.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	811	DenseMap<Instruction , InterleaveGroup<Instruction> > InterleaveGroupMap;
				812
				813	SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	814
				815	/// Holds dependences among the memory accesses in the loop. It maps a source
				816	/// access to a set of dependent sink accesses.
				817	DenseMap<Instruction , SmallPtrSet<Instruction , 2>> Dependences;
				818
				819	/// The descriptor for a strided memory access.
				820	struct StrideDescriptor {
				821	StrideDescriptor() = default;
				822	StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	823	Align Alignment)
				824	: Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	825
				826	// The access's stride. It is negative for a reverse access.
				827	int64_t Stride = 0;
				828
				829	// The scalar expression of this access.
				830	const SCEV *Scev = nullptr;
				831
				832	// The size of the memory object.
				833	uint64_t Size = 0;
				834
				835	// The alignment of this access.
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	836	Align Alignment;
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	837	};
				838
				839	/// A type for holding instructions and their stride descriptors.
				840	using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
				841
				842	/// Create a new interleave group with the given instruction \p Instr,
				843	/// stride \p Stride and alignment \p Align.
				844	///
				845	/// \returns the newly created interleave group.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	846	InterleaveGroup<Instruction> *
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	847	createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	848	assert(!InterleaveGroupMap.count(Instr) &&
				849	"Already in an interleaved access group");
				850	InterleaveGroupMap[Instr] =
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	851	new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	852	InterleaveGroups.insert(InterleaveGroupMap[Instr]);
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	853	return InterleaveGroupMap[Instr];
				854	}
				855
				856	/// Release the group and remove all the relationships.
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	857	void releaseGroup(InterleaveGroup<Instruction> *Group) {
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	858	for (unsigned i = 0; i < Group->getFactor(); i++)
				859	if (Instruction *Member = Group->getMember(i))
				860	InterleaveGroupMap.erase(Member);
				861
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	862	InterleaveGroups.erase(Group);
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	863	delete Group;
				864	}
				865
				866	/// Collect all the accesses with a constant stride in program order.
				867	void collectConstStrideAccesses(
				868	MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
				869	const ValueToValueMap &Strides);
				870
				871	/// Returns true if \p Stride is allowed in an interleaved group.
				872	static bool isStrided(int Stride);
				873
				874	/// Returns true if \p BB is a predicated block.
				875	bool isPredicated(BasicBlock *BB) const {
				876	return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
				877	}
				878
				879	/// Returns true if LoopAccessInfo can be used for dependence queries.
				880	bool areDependencesValid() const {
				881	return LAI && LAI->getDepChecker().getDependences();
				882	}
				883
				884	/// Returns true if memory accesses \p A and \p B can be reordered, if
				885	/// necessary, when constructing interleaved groups.
				886	///
				887	/// \p A must precede \p B in program order. We return false if reordering is
				888	/// not necessary or is prevented because \p A and \p B may be dependent.
				889	bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
				890	StrideEntry *B) const {
				891	// Code motion for interleaved accesses can potentially hoist strided loads
				892	// and sink strided stores. The code below checks the legality of the
				893	// following two conditions:
				894	//
				895	// 1. Potentially moving a strided load (B) before any store (A) that
				896	// precedes B, or
				897	//
				898	// 2. Potentially moving a strided store (A) after any load or store (B)
				899	// that A precedes.
				900	//
				901	// It's legal to reorder A and B if we know there isn't a dependence from A
				902	// to B. Note that this determination is conservative since some
				903	// dependences could potentially be reordered safely.
				904
				905	// A is potentially the source of a dependence.
				906	auto *Src = A->first;
				907	auto SrcDes = A->second;
				908
				909	// B is potentially the sink of a dependence.
				910	auto *Sink = B->first;
				911	auto SinkDes = B->second;
				912
				913	// Code motion for interleaved accesses can't violate WAR dependences.
				914	// Thus, reordering is legal if the source isn't a write.
				915	if (!Src->mayWriteToMemory())
				916	return true;
				917
				918	// At least one of the accesses must be strided.
				919	if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
				920	return true;
				921
				922	// If dependence information is not available from LoopAccessInfo,
				923	// conservatively assume the instructions can't be reordered.
				924	if (!areDependencesValid())
				925	return false;
				926
				927	// If we know there is a dependence from source to sink, assume the
				928	// instructions can't be reordered. Otherwise, reordering is legal.
				929	return Dependences.find(Src) == Dependences.end() \|\|
				930	!Dependences.lookup(Src).count(Sink);
				931	}
				932
				933	/// Collect the dependences from LoopAccessInfo.
				934	///
				935	/// We process the dependences once during the interleaved access analysis to
				936	/// enable constant-time dependence queries.
				937	void collectDependences() {
				938	if (!areDependencesValid())
				939	return;
				940	auto *Deps = LAI->getDepChecker().getDependences();
				941	for (auto Dep : *Deps)
				942	Dependences[Dep.getSource(LAI)].insert(Dep.getDestination(LAI));
				943	}
				944	};
				945
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	946	} // llvm namespace
				947
				948	#endif