Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines some vectorizer utilities. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_ANALYSIS_VECTORUTILS_H |
| 14 | #define LLVM_ANALYSIS_VECTORUTILS_H |
| 15 | |
| 16 | #include "llvm/ADT/MapVector.h" |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 17 | #include "llvm/ADT/SmallVector.h" |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 18 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 19 | #include "llvm/Support/CheckedArithmetic.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 20 | |
| 21 | namespace llvm { |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 22 | class TargetLibraryInfo; |
| 23 | |
| 24 | /// Describes the type of Parameters |
| 25 | enum class VFParamKind { |
| 26 | Vector, // No semantic information. |
| 27 | OMP_Linear, // declare simd linear(i) |
| 28 | OMP_LinearRef, // declare simd linear(ref(i)) |
| 29 | OMP_LinearVal, // declare simd linear(val(i)) |
| 30 | OMP_LinearUVal, // declare simd linear(uval(i)) |
| 31 | OMP_LinearPos, // declare simd linear(i:c) uniform(c) |
| 32 | OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) |
| 33 | OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) |
| 34 | OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c |
| 35 | OMP_Uniform, // declare simd uniform(i) |
| 36 | GlobalPredicate, // Global logical predicate that acts on all lanes |
| 37 | // of the input and output mask concurrently. For |
| 38 | // example, it is implied by the `M` token in the |
| 39 | // Vector Function ABI mangled name. |
| 40 | Unknown |
| 41 | }; |
| 42 | |
| 43 | /// Describes the type of Instruction Set Architecture |
| 44 | enum class VFISAKind { |
| 45 | AdvancedSIMD, // AArch64 Advanced SIMD (NEON) |
| 46 | SVE, // AArch64 Scalable Vector Extension |
| 47 | SSE, // x86 SSE |
| 48 | AVX, // x86 AVX |
| 49 | AVX2, // x86 AVX2 |
| 50 | AVX512, // x86 AVX512 |
| 51 | LLVM, // LLVM internal ISA for functions that are not |
| 52 | // attached to an existing ABI via name mangling. |
| 53 | Unknown // Unknown ISA |
| 54 | }; |
| 55 | |
| 56 | /// Encapsulates information needed to describe a parameter. |
| 57 | /// |
| 58 | /// The description of the parameter is not linked directly to |
| 59 | /// OpenMP or any other vector function description. This structure |
| 60 | /// is extendible to handle other paradigms that describe vector |
| 61 | /// functions and their parameters. |
| 62 | struct VFParameter { |
| 63 | unsigned ParamPos; // Parameter Position in Scalar Function. |
| 64 | VFParamKind ParamKind; // Kind of Parameter. |
| 65 | int LinearStepOrPos = 0; // Step or Position of the Parameter. |
| 66 | Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1. |
| 67 | |
| 68 | // Comparison operator. |
| 69 | bool operator==(const VFParameter &Other) const { |
| 70 | return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == |
| 71 | std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, |
| 72 | Other.Alignment); |
| 73 | } |
| 74 | }; |
| 75 | |
| 76 | /// Contains the information about the kind of vectorization |
| 77 | /// available. |
| 78 | /// |
| 79 | /// This object in independent on the paradigm used to |
| 80 | /// represent vector functions. in particular, it is not attached to |
| 81 | /// any target-specific ABI. |
| 82 | struct VFShape { |
| 83 | unsigned VF; // Vectorization factor. |
| 84 | bool IsScalable; // True if the function is a scalable function. |
| 85 | SmallVector<VFParameter, 8> Parameters; // List of parameter information. |
| 86 | // Comparison operator. |
| 87 | bool operator==(const VFShape &Other) const { |
| 88 | return std::tie(VF, IsScalable, Parameters) == |
| 89 | std::tie(Other.VF, Other.IsScalable, Other.Parameters); |
| 90 | } |
| 91 | |
| 92 | /// Update the parameter in position P.ParamPos to P. |
| 93 | void updateParam(VFParameter P) { |
| 94 | assert(P.ParamPos < Parameters.size() && "Invalid parameter position."); |
| 95 | Parameters[P.ParamPos] = P; |
| 96 | assert(hasValidParameterList() && "Invalid parameter list"); |
| 97 | } |
| 98 | |
| 99 | // Retrieve the VFShape that can be used to map a (scalar) function to itself, |
| 100 | // with VF = 1. |
| 101 | static VFShape getScalarShape(const CallInst &CI) { |
| 102 | return VFShape::get(CI, ElementCount::getFixed(1), |
| 103 | /*HasGlobalPredicate*/ false); |
| 104 | } |
| 105 | |
| 106 | // Retrieve the basic vectorization shape of the function, where all |
| 107 | // parameters are mapped to VFParamKind::Vector with \p EC |
| 108 | // lanes. Specifies whether the function has a Global Predicate |
| 109 | // argument via \p HasGlobalPred. |
| 110 | static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) { |
| 111 | SmallVector<VFParameter, 8> Parameters; |
| 112 | for (unsigned I = 0; I < CI.arg_size(); ++I) |
| 113 | Parameters.push_back(VFParameter({I, VFParamKind::Vector})); |
| 114 | if (HasGlobalPred) |
| 115 | Parameters.push_back( |
| 116 | VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); |
| 117 | |
| 118 | return {EC.getKnownMinValue(), EC.isScalable(), Parameters}; |
| 119 | } |
| 120 | /// Sanity check on the Parameters in the VFShape. |
| 121 | bool hasValidParameterList() const; |
| 122 | }; |
| 123 | |
| 124 | /// Holds the VFShape for a specific scalar to vector function mapping. |
| 125 | struct VFInfo { |
| 126 | VFShape Shape; /// Classification of the vector function. |
| 127 | std::string ScalarName; /// Scalar Function Name. |
| 128 | std::string VectorName; /// Vector Function Name associated to this VFInfo. |
| 129 | VFISAKind ISA; /// Instruction Set Architecture. |
| 130 | |
| 131 | // Comparison operator. |
| 132 | bool operator==(const VFInfo &Other) const { |
| 133 | return std::tie(Shape, ScalarName, VectorName, ISA) == |
| 134 | std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA); |
| 135 | } |
| 136 | }; |
| 137 | |
| 138 | namespace VFABI { |
| 139 | /// LLVM Internal VFABI ISA token for vector functions. |
| 140 | static constexpr char const *_LLVM_ = "_LLVM_"; |
| 141 | /// Prefix for internal name redirection for vector function that |
| 142 | /// tells the compiler to scalarize the call using the scalar name |
| 143 | /// of the function. For example, a mangled name like |
| 144 | /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the |
| 145 | /// vectorizer to vectorize the scalar call `foo`, and to scalarize |
| 146 | /// it once vectorization is done. |
| 147 | static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_"; |
| 148 | |
| 149 | /// Function to construct a VFInfo out of a mangled names in the |
| 150 | /// following format: |
| 151 | /// |
| 152 | /// <VFABI_name>{(<redirection>)} |
| 153 | /// |
| 154 | /// where <VFABI_name> is the name of the vector function, mangled according |
| 155 | /// to the rules described in the Vector Function ABI of the target vector |
| 156 | /// extension (or <isa> from now on). The <VFABI_name> is in the following |
| 157 | /// format: |
| 158 | /// |
| 159 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] |
| 160 | /// |
| 161 | /// This methods support demangling rules for the following <isa>: |
| 162 | /// |
| 163 | /// * AArch64: https://developer.arm.com/docs/101129/latest |
| 164 | /// |
| 165 | /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and |
| 166 | /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt |
| 167 | /// |
| 168 | /// \param MangledName -> input string in the format |
| 169 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. |
| 170 | /// \param M -> Module used to retrieve informations about the vector |
| 171 | /// function that are not possible to retrieve from the mangled |
| 172 | /// name. At the moment, this parameter is needed only to retrieve the |
| 173 | /// Vectorization Factor of scalable vector functions from their |
| 174 | /// respective IR declarations. |
| 175 | Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M); |
| 176 | |
| 177 | /// This routine mangles the given VectorName according to the LangRef |
| 178 | /// specification for vector-function-abi-variant attribute and is specific to |
| 179 | /// the TLI mappings. It is the responsibility of the caller to make sure that |
| 180 | /// this is only used if all parameters in the vector function are vector type. |
| 181 | /// This returned string holds scalar-to-vector mapping: |
| 182 | /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>) |
| 183 | /// |
| 184 | /// where: |
| 185 | /// |
| 186 | /// <isa> = "_LLVM_" |
| 187 | /// <mask> = "N". Note: TLI does not support masked interfaces. |
| 188 | /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor` |
| 189 | /// field of the `VecDesc` struct. |
| 190 | /// <vparams> = "v", as many as are the numArgs. |
| 191 | /// <scalarname> = the name of the scalar function. |
| 192 | /// <vectorname> = the name of the vector function. |
| 193 | std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, |
| 194 | unsigned numArgs, unsigned VF); |
| 195 | |
| 196 | /// Retrieve the `VFParamKind` from a string token. |
| 197 | VFParamKind getVFParamKindFromString(const StringRef Token); |
| 198 | |
| 199 | // Name of the attribute where the variant mappings are stored. |
| 200 | static constexpr char const *MappingsAttrName = "vector-function-abi-variant"; |
| 201 | |
| 202 | /// Populates a set of strings representing the Vector Function ABI variants |
| 203 | /// associated to the CallInst CI. If the CI does not contain the |
| 204 | /// vector-function-abi-variant attribute, we return without populating |
| 205 | /// VariantMappings, i.e. callers of getVectorVariantNames need not check for |
| 206 | /// the presence of the attribute (see InjectTLIMappings). |
| 207 | void getVectorVariantNames(const CallInst &CI, |
| 208 | SmallVectorImpl<std::string> &VariantMappings); |
| 209 | } // end namespace VFABI |
| 210 | |
| 211 | /// The Vector Function Database. |
| 212 | /// |
| 213 | /// Helper class used to find the vector functions associated to a |
| 214 | /// scalar CallInst. |
| 215 | class VFDatabase { |
| 216 | /// The Module of the CallInst CI. |
| 217 | const Module *M; |
| 218 | /// The CallInst instance being queried for scalar to vector mappings. |
| 219 | const CallInst &CI; |
| 220 | /// List of vector functions descriptors associated to the call |
| 221 | /// instruction. |
| 222 | const SmallVector<VFInfo, 8> ScalarToVectorMappings; |
| 223 | |
| 224 | /// Retrieve the scalar-to-vector mappings associated to the rule of |
| 225 | /// a vector Function ABI. |
| 226 | static void getVFABIMappings(const CallInst &CI, |
| 227 | SmallVectorImpl<VFInfo> &Mappings) { |
| 228 | if (!CI.getCalledFunction()) |
| 229 | return; |
| 230 | |
| 231 | const StringRef ScalarName = CI.getCalledFunction()->getName(); |
| 232 | |
| 233 | SmallVector<std::string, 8> ListOfStrings; |
| 234 | // The check for the vector-function-abi-variant attribute is done when |
| 235 | // retrieving the vector variant names here. |
| 236 | VFABI::getVectorVariantNames(CI, ListOfStrings); |
| 237 | if (ListOfStrings.empty()) |
| 238 | return; |
| 239 | for (const auto &MangledName : ListOfStrings) { |
| 240 | const Optional<VFInfo> Shape = |
| 241 | VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule())); |
| 242 | // A match is found via scalar and vector names, and also by |
| 243 | // ensuring that the variant described in the attribute has a |
| 244 | // corresponding definition or declaration of the vector |
| 245 | // function in the Module M. |
| 246 | if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) { |
| 247 | assert(CI.getModule()->getFunction(Shape.getValue().VectorName) && |
| 248 | "Vector function is missing."); |
| 249 | Mappings.push_back(Shape.getValue()); |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | public: |
| 255 | /// Retrieve all the VFInfo instances associated to the CallInst CI. |
| 256 | static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) { |
| 257 | SmallVector<VFInfo, 8> Ret; |
| 258 | |
| 259 | // Get mappings from the Vector Function ABI variants. |
| 260 | getVFABIMappings(CI, Ret); |
| 261 | |
| 262 | // Other non-VFABI variants should be retrieved here. |
| 263 | |
| 264 | return Ret; |
| 265 | } |
| 266 | |
| 267 | /// Constructor, requires a CallInst instance. |
| 268 | VFDatabase(CallInst &CI) |
| 269 | : M(CI.getModule()), CI(CI), |
| 270 | ScalarToVectorMappings(VFDatabase::getMappings(CI)) {} |
| 271 | /// \defgroup VFDatabase query interface. |
| 272 | /// |
| 273 | /// @{ |
| 274 | /// Retrieve the Function with VFShape \p Shape. |
| 275 | Function *getVectorizedFunction(const VFShape &Shape) const { |
| 276 | if (Shape == VFShape::getScalarShape(CI)) |
| 277 | return CI.getCalledFunction(); |
| 278 | |
| 279 | for (const auto &Info : ScalarToVectorMappings) |
| 280 | if (Info.Shape == Shape) |
| 281 | return M->getFunction(Info.VectorName); |
| 282 | |
| 283 | return nullptr; |
| 284 | } |
| 285 | /// @} |
| 286 | }; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 287 | |
| 288 | template <typename T> class ArrayRef; |
| 289 | class DemandedBits; |
| 290 | class GetElementPtrInst; |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 291 | template <typename InstTy> class InterleaveGroup; |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 292 | class IRBuilderBase; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 293 | class Loop; |
| 294 | class ScalarEvolution; |
| 295 | class TargetTransformInfo; |
| 296 | class Type; |
| 297 | class Value; |
| 298 | |
| 299 | namespace Intrinsic { |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 300 | typedef unsigned ID; |
| 301 | } |
| 302 | |
| 303 | /// A helper function for converting Scalar types to vector types. If |
| 304 | /// the incoming type is void, we return void. If the EC represents a |
| 305 | /// scalar, we return the scalar type. |
| 306 | inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { |
| 307 | if (Scalar->isVoidTy() || EC.isScalar()) |
| 308 | return Scalar; |
| 309 | return VectorType::get(Scalar, EC); |
| 310 | } |
| 311 | |
| 312 | inline Type *ToVectorTy(Type *Scalar, unsigned VF) { |
| 313 | return ToVectorTy(Scalar, ElementCount::getFixed(VF)); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 314 | } |
| 315 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 316 | /// Identify if the intrinsic is trivially vectorizable. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 317 | /// This method returns true if the intrinsic's argument types are all scalars |
| 318 | /// for the scalar form of the intrinsic and all vectors (or scalars handled by |
| 319 | /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 320 | bool isTriviallyVectorizable(Intrinsic::ID ID); |
| 321 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 322 | /// Identifies if the vector form of the intrinsic has a scalar operand. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 323 | bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); |
| 324 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 325 | /// Returns intrinsic ID for call. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 326 | /// For the input call instruction it finds mapping intrinsic and returns |
| 327 | /// its intrinsic ID, in case it does not found it return not_intrinsic. |
| 328 | Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, |
| 329 | const TargetLibraryInfo *TLI); |
| 330 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 331 | /// Find the operand of the GEP that should be checked for consecutive |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 332 | /// stores. This ignores trailing indices that have no effect on the final |
| 333 | /// pointer. |
| 334 | unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); |
| 335 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 336 | /// If the argument is a GEP, then returns the operand identified by |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 337 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant |
| 338 | /// operand, it returns that instead. |
| 339 | Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 340 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 341 | /// If a value has only one user that is a CastInst, return it. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 342 | Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); |
| 343 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 344 | /// Get the stride of a pointer access in a loop. Looks for symbolic |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 345 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. |
| 346 | Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 347 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 348 | /// Given a vector and an element number, see if the scalar value is |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 349 | /// already around as a register, for example if it were inserted then extracted |
| 350 | /// from the vector. |
| 351 | Value *findScalarElement(Value *V, unsigned EltNo); |
| 352 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 353 | /// If all non-negative \p Mask elements are the same value, return that value. |
| 354 | /// If all elements are negative (undefined) or \p Mask contains different |
| 355 | /// non-negative values, return -1. |
| 356 | int getSplatIndex(ArrayRef<int> Mask); |
| 357 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 358 | /// Get splat value if the input is a splat vector or return nullptr. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 359 | /// The value may be extracted from a splat constants vector or from |
| 360 | /// a sequence of instructions that broadcast a single value into a vector. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 361 | Value *getSplatValue(const Value *V); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 362 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 363 | /// Return true if each element of the vector value \p V is poisoned or equal to |
| 364 | /// every other non-poisoned element. If an index element is specified, either |
| 365 | /// every element of the vector is poisoned or the element at that index is not |
| 366 | /// poisoned and equal to every other non-poisoned element. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 367 | /// This may be more powerful than the related getSplatValue() because it is |
| 368 | /// not limited by finding a scalar source value to a splatted vector. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 369 | bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0); |
| 370 | |
| 371 | /// Replace each shuffle mask index with the scaled sequential indices for an |
| 372 | /// equivalent mask of narrowed elements. Mask elements that are less than 0 |
| 373 | /// (sentinel values) are repeated in the output mask. |
| 374 | /// |
| 375 | /// Example with Scale = 4: |
| 376 | /// <4 x i32> <3, 2, 0, -1> --> |
| 377 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> |
| 378 | /// |
| 379 | /// This is the reverse process of widening shuffle mask elements, but it always |
| 380 | /// succeeds because the indexes can always be multiplied (scaled up) to map to |
| 381 | /// narrower vector elements. |
| 382 | void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
| 383 | SmallVectorImpl<int> &ScaledMask); |
| 384 | |
| 385 | /// Try to transform a shuffle mask by replacing elements with the scaled index |
| 386 | /// for an equivalent mask of widened elements. If all mask elements that would |
| 387 | /// map to a wider element of the new mask are the same negative number |
| 388 | /// (sentinel value), that element of the new mask is the same value. If any |
| 389 | /// element in a given slice is negative and some other element in that slice is |
| 390 | /// not the same value, return false (partial matches with sentinel values are |
| 391 | /// not allowed). |
| 392 | /// |
| 393 | /// Example with Scale = 4: |
| 394 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> --> |
| 395 | /// <4 x i32> <3, 2, 0, -1> |
| 396 | /// |
| 397 | /// This is the reverse process of narrowing shuffle mask elements if it |
| 398 | /// succeeds. This transform is not always possible because indexes may not |
| 399 | /// divide evenly (scale down) to map to wider vector elements. |
| 400 | bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
| 401 | SmallVectorImpl<int> &ScaledMask); |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 402 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 403 | /// Compute a map of integer instructions to their minimum legal type |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 404 | /// size. |
| 405 | /// |
| 406 | /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int |
| 407 | /// type (e.g. i32) whenever arithmetic is performed on them. |
| 408 | /// |
| 409 | /// For targets with native i8 or i16 operations, usually InstCombine can shrink |
| 410 | /// the arithmetic type down again. However InstCombine refuses to create |
| 411 | /// illegal types, so for targets without i8 or i16 registers, the lengthening |
| 412 | /// and shrinking remains. |
| 413 | /// |
| 414 | /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when |
| 415 | /// their scalar equivalents do not, so during vectorization it is important to |
| 416 | /// remove these lengthens and truncates when deciding the profitability of |
| 417 | /// vectorization. |
| 418 | /// |
| 419 | /// This function analyzes the given range of instructions and determines the |
| 420 | /// minimum type size each can be converted to. It attempts to remove or |
| 421 | /// minimize type size changes across each def-use chain, so for example in the |
| 422 | /// following code: |
| 423 | /// |
| 424 | /// %1 = load i8, i8* |
| 425 | /// %2 = add i8 %1, 2 |
| 426 | /// %3 = load i16, i16* |
| 427 | /// %4 = zext i8 %2 to i32 |
| 428 | /// %5 = zext i16 %3 to i32 |
| 429 | /// %6 = add i32 %4, %5 |
| 430 | /// %7 = trunc i32 %6 to i16 |
| 431 | /// |
| 432 | /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes |
| 433 | /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. |
| 434 | /// |
| 435 | /// If the optional TargetTransformInfo is provided, this function tries harder |
| 436 | /// to do less work by only looking at illegal types. |
| 437 | MapVector<Instruction*, uint64_t> |
| 438 | computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, |
| 439 | DemandedBits &DB, |
| 440 | const TargetTransformInfo *TTI=nullptr); |
| 441 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 442 | /// Compute the union of two access-group lists. |
| 443 | /// |
| 444 | /// If the list contains just one access group, it is returned directly. If the |
| 445 | /// list is empty, returns nullptr. |
| 446 | MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); |
| 447 | |
| 448 | /// Compute the access-group list of access groups that @p Inst1 and @p Inst2 |
| 449 | /// are both in. If either instruction does not access memory at all, it is |
| 450 | /// considered to be in every list. |
| 451 | /// |
| 452 | /// If the list contains just one access group, it is returned directly. If the |
| 453 | /// list is empty, returns nullptr. |
| 454 | MDNode *intersectAccessGroups(const Instruction *Inst1, |
| 455 | const Instruction *Inst2); |
| 456 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 457 | /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 458 | /// MD_nontemporal, MD_access_group]. |
| 459 | /// For K in Kinds, we get the MDNode for K from each of the |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 460 | /// elements of VL, compute their "intersection" (i.e., the most generic |
| 461 | /// metadata value that covers all of the individual values), and set I's |
| 462 | /// metadata for M equal to the intersection value. |
| 463 | /// |
| 464 | /// This function always sets a (possibly null) value for each K in Kinds. |
| 465 | Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); |
| 466 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 467 | /// Create a mask that filters the members of an interleave group where there |
| 468 | /// are gaps. |
| 469 | /// |
| 470 | /// For example, the mask for \p Group with interleave-factor 3 |
| 471 | /// and \p VF 4, that has only its first member present is: |
| 472 | /// |
| 473 | /// <1,0,0,1,0,0,1,0,0,1,0,0> |
| 474 | /// |
| 475 | /// Note: The result is a mask of 0's and 1's, as opposed to the other |
| 476 | /// create[*]Mask() utilities which create a shuffle mask (mask that |
| 477 | /// consists of indices). |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 478 | Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 479 | const InterleaveGroup<Instruction> &Group); |
| 480 | |
| 481 | /// Create a mask with replicated elements. |
| 482 | /// |
| 483 | /// This function creates a shuffle mask for replicating each of the \p VF |
| 484 | /// elements in a vector \p ReplicationFactor times. It can be used to |
| 485 | /// transform a mask of \p VF elements into a mask of |
| 486 | /// \p VF * \p ReplicationFactor elements used by a predicated |
| 487 | /// interleaved-group of loads/stores whose Interleaved-factor == |
| 488 | /// \p ReplicationFactor. |
| 489 | /// |
| 490 | /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: |
| 491 | /// |
| 492 | /// <0,0,0,1,1,1,2,2,2,3,3,3> |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 493 | llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor, |
| 494 | unsigned VF); |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 495 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 496 | /// Create an interleave shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 497 | /// |
| 498 | /// This function creates a shuffle mask for interleaving \p NumVecs vectors of |
| 499 | /// vectorization factor \p VF into a single wide vector. The mask is of the |
| 500 | /// form: |
| 501 | /// |
| 502 | /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> |
| 503 | /// |
| 504 | /// For example, the mask for VF = 4 and NumVecs = 2 is: |
| 505 | /// |
| 506 | /// <0, 4, 1, 5, 2, 6, 3, 7>. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 507 | llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 508 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 509 | /// Create a stride shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 510 | /// |
| 511 | /// This function creates a shuffle mask whose elements begin at \p Start and |
| 512 | /// are incremented by \p Stride. The mask can be used to deinterleave an |
| 513 | /// interleaved vector into separate vectors of vectorization factor \p VF. The |
| 514 | /// mask is of the form: |
| 515 | /// |
| 516 | /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> |
| 517 | /// |
| 518 | /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: |
| 519 | /// |
| 520 | /// <0, 2, 4, 6> |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 521 | llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride, |
| 522 | unsigned VF); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 523 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 524 | /// Create a sequential shuffle mask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 525 | /// |
| 526 | /// This function creates shuffle mask whose elements are sequential and begin |
| 527 | /// at \p Start. The mask contains \p NumInts integers and is padded with \p |
| 528 | /// NumUndefs undef values. The mask is of the form: |
| 529 | /// |
| 530 | /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> |
| 531 | /// |
| 532 | /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: |
| 533 | /// |
| 534 | /// <0, 1, 2, 3, undef, undef, undef, undef> |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 535 | llvm::SmallVector<int, 16> |
| 536 | createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 537 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 538 | /// Concatenate a list of vectors. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 539 | /// |
| 540 | /// This function generates code that concatenate the vectors in \p Vecs into a |
| 541 | /// single large vector. The number of vectors should be greater than one, and |
| 542 | /// their element types should be the same. The number of elements in the |
| 543 | /// vectors should also be the same; however, if the last vector has fewer |
| 544 | /// elements, it will be padded with undefs. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 545 | Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 546 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 547 | /// Given a mask vector of i1, Return true if all of the elements of this |
| 548 | /// predicate mask are known to be false or undef. That is, return true if all |
| 549 | /// lanes can be assumed inactive. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 550 | bool maskIsAllZeroOrUndef(Value *Mask); |
| 551 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 552 | /// Given a mask vector of i1, Return true if all of the elements of this |
| 553 | /// predicate mask are known to be true or undef. That is, return true if all |
| 554 | /// lanes can be assumed active. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 555 | bool maskIsAllOneOrUndef(Value *Mask); |
| 556 | |
| 557 | /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) |
| 558 | /// for each lane which may be active. |
| 559 | APInt possiblyDemandedEltsInMask(Value *Mask); |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 560 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 561 | /// The group of interleaved loads/stores sharing the same stride and |
| 562 | /// close to each other. |
| 563 | /// |
| 564 | /// Each member in this group has an index starting from 0, and the largest |
| 565 | /// index should be less than interleaved factor, which is equal to the absolute |
| 566 | /// value of the access's stride. |
| 567 | /// |
| 568 | /// E.g. An interleaved load group of factor 4: |
| 569 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 570 | /// a = A[i]; // Member of index 0 |
| 571 | /// b = A[i+1]; // Member of index 1 |
| 572 | /// d = A[i+3]; // Member of index 3 |
| 573 | /// ... |
| 574 | /// } |
| 575 | /// |
| 576 | /// An interleaved store group of factor 4: |
| 577 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 578 | /// ... |
| 579 | /// A[i] = a; // Member of index 0 |
| 580 | /// A[i+1] = b; // Member of index 1 |
| 581 | /// A[i+2] = c; // Member of index 2 |
| 582 | /// A[i+3] = d; // Member of index 3 |
| 583 | /// } |
| 584 | /// |
| 585 | /// Note: the interleaved load group could have gaps (missing members), but |
| 586 | /// the interleaved store group doesn't allow gaps. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 587 | template <typename InstTy> class InterleaveGroup { |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 588 | public: |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 589 | InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) |
| 590 | : Factor(Factor), Reverse(Reverse), Alignment(Alignment), |
| 591 | InsertPos(nullptr) {} |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 592 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 593 | InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) |
| 594 | : Alignment(Alignment), InsertPos(Instr) { |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 595 | Factor = std::abs(Stride); |
| 596 | assert(Factor > 1 && "Invalid interleave factor"); |
| 597 | |
| 598 | Reverse = Stride < 0; |
| 599 | Members[0] = Instr; |
| 600 | } |
| 601 | |
| 602 | bool isReverse() const { return Reverse; } |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 603 | uint32_t getFactor() const { return Factor; } |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 604 | LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const, |
| 605 | "Use getAlign instead.") { |
| 606 | return Alignment.value(); |
| 607 | } |
| 608 | Align getAlign() const { return Alignment; } |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 609 | uint32_t getNumMembers() const { return Members.size(); } |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 610 | |
| 611 | /// Try to insert a new member \p Instr with index \p Index and |
| 612 | /// alignment \p NewAlign. The index is related to the leader and it could be |
| 613 | /// negative if it is the new leader. |
| 614 | /// |
| 615 | /// \returns false if the instruction doesn't belong to the group. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 616 | bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 617 | // Make sure the key fits in an int32_t. |
| 618 | Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey); |
| 619 | if (!MaybeKey) |
| 620 | return false; |
| 621 | int32_t Key = *MaybeKey; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 622 | |
| 623 | // Skip if there is already a member with the same index. |
| 624 | if (Members.find(Key) != Members.end()) |
| 625 | return false; |
| 626 | |
| 627 | if (Key > LargestKey) { |
| 628 | // The largest index is always less than the interleave factor. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 629 | if (Index >= static_cast<int32_t>(Factor)) |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 630 | return false; |
| 631 | |
| 632 | LargestKey = Key; |
| 633 | } else if (Key < SmallestKey) { |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 634 | |
| 635 | // Make sure the largest index fits in an int32_t. |
| 636 | Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key); |
| 637 | if (!MaybeLargestIndex) |
| 638 | return false; |
| 639 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 640 | // The largest index is always less than the interleave factor. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 641 | if (*MaybeLargestIndex >= static_cast<int64_t>(Factor)) |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 642 | return false; |
| 643 | |
| 644 | SmallestKey = Key; |
| 645 | } |
| 646 | |
| 647 | // It's always safe to select the minimum alignment. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 648 | Alignment = std::min(Alignment, NewAlign); |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 649 | Members[Key] = Instr; |
| 650 | return true; |
| 651 | } |
| 652 | |
| 653 | /// Get the member with the given index \p Index |
| 654 | /// |
| 655 | /// \returns nullptr if contains no such member. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 656 | InstTy *getMember(uint32_t Index) const { |
| 657 | int32_t Key = SmallestKey + Index; |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 658 | return Members.lookup(Key); |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 659 | } |
| 660 | |
| 661 | /// Get the index for the given member. Unlike the key in the member |
| 662 | /// map, the index starts from 0. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 663 | uint32_t getIndex(const InstTy *Instr) const { |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 664 | for (auto I : Members) { |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 665 | if (I.second == Instr) |
| 666 | return I.first - SmallestKey; |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 667 | } |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 668 | |
| 669 | llvm_unreachable("InterleaveGroup contains no such member"); |
| 670 | } |
| 671 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 672 | InstTy *getInsertPos() const { return InsertPos; } |
| 673 | void setInsertPos(InstTy *Inst) { InsertPos = Inst; } |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 674 | |
| 675 | /// Add metadata (e.g. alias info) from the instructions in this group to \p |
| 676 | /// NewInst. |
| 677 | /// |
| 678 | /// FIXME: this function currently does not add noalias metadata a'la |
| 679 | /// addNewMedata. To do that we need to compute the intersection of the |
| 680 | /// noalias info from all members. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 681 | void addMetadata(InstTy *NewInst) const; |
| 682 | |
| 683 | /// Returns true if this Group requires a scalar iteration to handle gaps. |
| 684 | bool requiresScalarEpilogue() const { |
| 685 | // If the last member of the Group exists, then a scalar epilog is not |
| 686 | // needed for this group. |
| 687 | if (getMember(getFactor() - 1)) |
| 688 | return false; |
| 689 | |
| 690 | // We have a group with gaps. It therefore cannot be a group of stores, |
| 691 | // and it can't be a reversed access, because such groups get invalidated. |
| 692 | assert(!getMember(0)->mayWriteToMemory() && |
| 693 | "Group should have been invalidated"); |
| 694 | assert(!isReverse() && "Group should have been invalidated"); |
| 695 | |
| 696 | // This is a group of loads, with gaps, and without a last-member |
| 697 | return true; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 698 | } |
| 699 | |
| 700 | private: |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 701 | uint32_t Factor; // Interleave Factor. |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 702 | bool Reverse; |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 703 | Align Alignment; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 704 | DenseMap<int32_t, InstTy *> Members; |
| 705 | int32_t SmallestKey = 0; |
| 706 | int32_t LargestKey = 0; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 707 | |
| 708 | // To avoid breaking dependences, vectorized instructions of an interleave |
| 709 | // group should be inserted at either the first load or the last store in |
| 710 | // program order. |
| 711 | // |
| 712 | // E.g. %even = load i32 // Insert Position |
| 713 | // %add = add i32 %even // Use of %even |
| 714 | // %odd = load i32 |
| 715 | // |
| 716 | // store i32 %even |
| 717 | // %odd = add i32 // Def of %odd |
| 718 | // store i32 %odd // Insert Position |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 719 | InstTy *InsertPos; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 720 | }; |
| 721 | |
| 722 | /// Drive the analysis of interleaved memory accesses in the loop. |
| 723 | /// |
| 724 | /// Use this class to analyze interleaved accesses only when we can vectorize |
| 725 | /// a loop. Otherwise it's meaningless to do analysis as the vectorization |
| 726 | /// on interleaved accesses is unsafe. |
| 727 | /// |
| 728 | /// The analysis collects interleave groups and records the relationships |
| 729 | /// between the member and the group in a map. |
| 730 | class InterleavedAccessInfo { |
| 731 | public: |
| 732 | InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, |
| 733 | DominatorTree *DT, LoopInfo *LI, |
| 734 | const LoopAccessInfo *LAI) |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 735 | : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 736 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 737 | ~InterleavedAccessInfo() { invalidateGroups(); } |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 738 | |
| 739 | /// Analyze the interleaved accesses and collect them in interleave |
| 740 | /// groups. Substitute symbolic strides using \p Strides. |
| 741 | /// Consider also predicated loads/stores in the analysis if |
| 742 | /// \p EnableMaskedInterleavedGroup is true. |
| 743 | void analyzeInterleaving(bool EnableMaskedInterleavedGroup); |
| 744 | |
| 745 | /// Invalidate groups, e.g., in case all blocks in loop will be predicated |
| 746 | /// contrary to original assumption. Although we currently prevent group |
| 747 | /// formation for predicated accesses, we may be able to relax this limitation |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 748 | /// in the future once we handle more complicated blocks. Returns true if any |
| 749 | /// groups were invalidated. |
| 750 | bool invalidateGroups() { |
| 751 | if (InterleaveGroups.empty()) { |
| 752 | assert( |
| 753 | !RequiresScalarEpilogue && |
| 754 | "RequiresScalarEpilog should not be set without interleave groups"); |
| 755 | return false; |
| 756 | } |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 757 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 758 | InterleaveGroupMap.clear(); |
| 759 | for (auto *Ptr : InterleaveGroups) |
| 760 | delete Ptr; |
| 761 | InterleaveGroups.clear(); |
| 762 | RequiresScalarEpilogue = false; |
| 763 | return true; |
| 764 | } |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 765 | |
| 766 | /// Check if \p Instr belongs to any interleave group. |
| 767 | bool isInterleaved(Instruction *Instr) const { |
| 768 | return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); |
| 769 | } |
| 770 | |
| 771 | /// Get the interleave group that \p Instr belongs to. |
| 772 | /// |
| 773 | /// \returns nullptr if doesn't have such group. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 774 | InterleaveGroup<Instruction> * |
| 775 | getInterleaveGroup(const Instruction *Instr) const { |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 776 | return InterleaveGroupMap.lookup(Instr); |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 777 | } |
| 778 | |
| 779 | iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> |
| 780 | getInterleaveGroups() { |
| 781 | return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 782 | } |
| 783 | |
| 784 | /// Returns true if an interleaved group that may access memory |
| 785 | /// out-of-bounds requires a scalar epilogue iteration for correctness. |
| 786 | bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } |
| 787 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 788 | /// Invalidate groups that require a scalar epilogue (due to gaps). This can |
| 789 | /// happen when optimizing for size forbids a scalar epilogue, and the gap |
| 790 | /// cannot be filtered by masking the load/store. |
| 791 | void invalidateGroupsRequiringScalarEpilogue(); |
| 792 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 793 | private: |
| 794 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. |
| 795 | /// Simplifies SCEV expressions in the context of existing SCEV assumptions. |
| 796 | /// The interleaved access analysis can also add new predicates (for example |
| 797 | /// by versioning strides of pointers). |
| 798 | PredicatedScalarEvolution &PSE; |
| 799 | |
| 800 | Loop *TheLoop; |
| 801 | DominatorTree *DT; |
| 802 | LoopInfo *LI; |
| 803 | const LoopAccessInfo *LAI; |
| 804 | |
| 805 | /// True if the loop may contain non-reversed interleaved groups with |
| 806 | /// out-of-bounds accesses. We ensure we don't speculatively access memory |
| 807 | /// out-of-bounds by executing at least one scalar epilogue iteration. |
| 808 | bool RequiresScalarEpilogue = false; |
| 809 | |
| 810 | /// Holds the relationships between the members and the interleave group. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 811 | DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; |
| 812 | |
| 813 | SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 814 | |
| 815 | /// Holds dependences among the memory accesses in the loop. It maps a source |
| 816 | /// access to a set of dependent sink accesses. |
| 817 | DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; |
| 818 | |
| 819 | /// The descriptor for a strided memory access. |
| 820 | struct StrideDescriptor { |
| 821 | StrideDescriptor() = default; |
| 822 | StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 823 | Align Alignment) |
| 824 | : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 825 | |
| 826 | // The access's stride. It is negative for a reverse access. |
| 827 | int64_t Stride = 0; |
| 828 | |
| 829 | // The scalar expression of this access. |
| 830 | const SCEV *Scev = nullptr; |
| 831 | |
| 832 | // The size of the memory object. |
| 833 | uint64_t Size = 0; |
| 834 | |
| 835 | // The alignment of this access. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 836 | Align Alignment; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 837 | }; |
| 838 | |
| 839 | /// A type for holding instructions and their stride descriptors. |
| 840 | using StrideEntry = std::pair<Instruction *, StrideDescriptor>; |
| 841 | |
| 842 | /// Create a new interleave group with the given instruction \p Instr, |
| 843 | /// stride \p Stride and alignment \p Align. |
| 844 | /// |
| 845 | /// \returns the newly created interleave group. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 846 | InterleaveGroup<Instruction> * |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 847 | createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 848 | assert(!InterleaveGroupMap.count(Instr) && |
| 849 | "Already in an interleaved access group"); |
| 850 | InterleaveGroupMap[Instr] = |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 851 | new InterleaveGroup<Instruction>(Instr, Stride, Alignment); |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 852 | InterleaveGroups.insert(InterleaveGroupMap[Instr]); |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 853 | return InterleaveGroupMap[Instr]; |
| 854 | } |
| 855 | |
| 856 | /// Release the group and remove all the relationships. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 857 | void releaseGroup(InterleaveGroup<Instruction> *Group) { |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 858 | for (unsigned i = 0; i < Group->getFactor(); i++) |
| 859 | if (Instruction *Member = Group->getMember(i)) |
| 860 | InterleaveGroupMap.erase(Member); |
| 861 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 862 | InterleaveGroups.erase(Group); |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 863 | delete Group; |
| 864 | } |
| 865 | |
| 866 | /// Collect all the accesses with a constant stride in program order. |
| 867 | void collectConstStrideAccesses( |
| 868 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, |
| 869 | const ValueToValueMap &Strides); |
| 870 | |
| 871 | /// Returns true if \p Stride is allowed in an interleaved group. |
| 872 | static bool isStrided(int Stride); |
| 873 | |
| 874 | /// Returns true if \p BB is a predicated block. |
| 875 | bool isPredicated(BasicBlock *BB) const { |
| 876 | return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); |
| 877 | } |
| 878 | |
| 879 | /// Returns true if LoopAccessInfo can be used for dependence queries. |
| 880 | bool areDependencesValid() const { |
| 881 | return LAI && LAI->getDepChecker().getDependences(); |
| 882 | } |
| 883 | |
| 884 | /// Returns true if memory accesses \p A and \p B can be reordered, if |
| 885 | /// necessary, when constructing interleaved groups. |
| 886 | /// |
| 887 | /// \p A must precede \p B in program order. We return false if reordering is |
| 888 | /// not necessary or is prevented because \p A and \p B may be dependent. |
| 889 | bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, |
| 890 | StrideEntry *B) const { |
| 891 | // Code motion for interleaved accesses can potentially hoist strided loads |
| 892 | // and sink strided stores. The code below checks the legality of the |
| 893 | // following two conditions: |
| 894 | // |
| 895 | // 1. Potentially moving a strided load (B) before any store (A) that |
| 896 | // precedes B, or |
| 897 | // |
| 898 | // 2. Potentially moving a strided store (A) after any load or store (B) |
| 899 | // that A precedes. |
| 900 | // |
| 901 | // It's legal to reorder A and B if we know there isn't a dependence from A |
| 902 | // to B. Note that this determination is conservative since some |
| 903 | // dependences could potentially be reordered safely. |
| 904 | |
| 905 | // A is potentially the source of a dependence. |
| 906 | auto *Src = A->first; |
| 907 | auto SrcDes = A->second; |
| 908 | |
| 909 | // B is potentially the sink of a dependence. |
| 910 | auto *Sink = B->first; |
| 911 | auto SinkDes = B->second; |
| 912 | |
| 913 | // Code motion for interleaved accesses can't violate WAR dependences. |
| 914 | // Thus, reordering is legal if the source isn't a write. |
| 915 | if (!Src->mayWriteToMemory()) |
| 916 | return true; |
| 917 | |
| 918 | // At least one of the accesses must be strided. |
| 919 | if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) |
| 920 | return true; |
| 921 | |
| 922 | // If dependence information is not available from LoopAccessInfo, |
| 923 | // conservatively assume the instructions can't be reordered. |
| 924 | if (!areDependencesValid()) |
| 925 | return false; |
| 926 | |
| 927 | // If we know there is a dependence from source to sink, assume the |
| 928 | // instructions can't be reordered. Otherwise, reordering is legal. |
| 929 | return Dependences.find(Src) == Dependences.end() || |
| 930 | !Dependences.lookup(Src).count(Sink); |
| 931 | } |
| 932 | |
| 933 | /// Collect the dependences from LoopAccessInfo. |
| 934 | /// |
| 935 | /// We process the dependences once during the interleaved access analysis to |
| 936 | /// enable constant-time dependence queries. |
| 937 | void collectDependences() { |
| 938 | if (!areDependencesValid()) |
| 939 | return; |
| 940 | auto *Deps = LAI->getDepChecker().getDependences(); |
| 941 | for (auto Dep : *Deps) |
| 942 | Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); |
| 943 | } |
| 944 | }; |
| 945 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 946 | } // llvm namespace |
| 947 | |
| 948 | #endif |