Update prebuilt Clang to r416183b from Android.
https://android.googlesource.com/platform/prebuilts/clang/host/
linux-x86/+/06a71ddac05c22edb2d10b590e1769b3f8619bef
clang 12.0.5 (based on r416183b) from build 7284624.
Change-Id: I277a316abcf47307562d8b748b84870f31a72866
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/linux-x64/clang/include/polly/CodeGen/BlockGenerators.h b/linux-x64/clang/include/polly/CodeGen/BlockGenerators.h
new file mode 100644
index 0000000..f2c52c8
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/BlockGenerators.h
@@ -0,0 +1,973 @@
+//===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the BlockGenerator and VectorBlockGenerator classes, which
+// generate sequential code and vectorized code for a polyhedral statement,
+// respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_BLOCK_GENERATORS_H
+#define POLLY_BLOCK_GENERATORS_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "isl/isl-noexceptions.h"
+
+namespace polly {
+using namespace llvm;
+class MemoryAccess;
+class ScopArrayInfo;
+class IslExprBuilder;
+
+/// Generate a new basic block for a polyhedral statement.
+class BlockGenerator {
+public:
+ typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
+
+ /// Map types to resolve scalar dependences.
+ ///
+ ///@{
+ using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
+
+ /// Simple vector of instructions to store escape users.
+ using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
+
+ /// Map type to resolve escaping users for scalar instructions.
+ ///
+ /// @see The EscapeMap member.
+ using EscapeUsersAllocaMapTy =
+ MapVector<Instruction *,
+ std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
+
+ ///@}
+
+ /// Create a generator for basic blocks.
+ ///
+ /// @param Builder The LLVM-IR Builder used to generate the statement. The
+ /// code is generated at the location, the Builder points
+ /// to.
+ /// @param LI The loop info for the current function
+ /// @param SE The scalar evolution info for the current function
+ /// @param DT The dominator tree of this function.
+ /// @param ScalarMap Map from scalars to their demoted location.
+ /// @param EscapeMap Map from scalars to their escape users and locations.
+ /// @param GlobalMap A mapping from llvm::Values used in the original scop
+ /// region to a new set of llvm::Values. Each reference to
+ /// an original value appearing in this mapping is replaced
+ /// with the new value it is mapped to.
+ /// @param ExprBuilder An expression builder to generate new access functions.
+ /// @param StartBlock The first basic block after the RTC.
+ BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
+ DominatorTree &DT, AllocaMapTy &ScalarMap,
+ EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
+ IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
+
+ /// Copy the basic block.
+ ///
+ /// This copies the entire basic block and updates references to old values
+ /// with references to new values, as defined by GlobalMap.
+ ///
+ /// @param Stmt The block statement to code generate.
+ /// @param LTS A map from old loops to new induction variables as
+ /// SCEVs.
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S,
+ isl_id_to_ast_expr *NewAccesses);
+
+ /// Remove a ScopArrayInfo's allocation from the ScalarMap.
+ ///
+ /// This function allows to remove values from the ScalarMap. This is useful
+ /// if the corresponding alloca instruction will be deleted (or moved into
+ /// another module), as without removing these values the underlying
+ /// AssertingVH will trigger due to us still keeping reference to this
+ /// scalar.
+ ///
+ /// @param Array The array for which the alloca was generated.
+ void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
+
+ /// Return the alloca for @p Access.
+ ///
+ /// If no alloca was mapped for @p Access a new one is created.
+ ///
+ /// @param Access The memory access for which to generate the alloca.
+ ///
+ /// @returns The alloca for @p Access or a replacement value taken from
+ /// GlobalMap.
+ Value *getOrCreateAlloca(const MemoryAccess &Access);
+
+ /// Return the alloca for @p Array.
+ ///
+ /// If no alloca was mapped for @p Array a new one is created.
+ ///
+ /// @param Array The array for which to generate the alloca.
+ ///
+ /// @returns The alloca for @p Array or a replacement value taken from
+ /// GlobalMap.
+ Value *getOrCreateAlloca(const ScopArrayInfo *Array);
+
+ /// Finalize the code generation for the SCoP @p S.
+ ///
+ /// This will initialize and finalize the scalar variables we demoted during
+ /// the code generation.
+ ///
+ /// @see createScalarInitialization(Scop &)
+ /// @see createScalarFinalization(Region &)
+ void finalizeSCoP(Scop &S);
+
+ /// An empty destructor
+ virtual ~BlockGenerator() {}
+
+ BlockGenerator(const BlockGenerator &) = default;
+
+protected:
+ PollyIRBuilder &Builder;
+ LoopInfo &LI;
+ ScalarEvolution &SE;
+ IslExprBuilder *ExprBuilder;
+
+ /// The dominator tree of this function.
+ DominatorTree &DT;
+
+ /// The entry block of the current function.
+ BasicBlock *EntryBB;
+
+ /// Map to resolve scalar dependences for PHI operands and scalars.
+ ///
+ /// When translating code that contains scalar dependences as they result from
+ /// inter-block scalar dependences (including the use of data carrying PHI
+ /// nodes), we do not directly regenerate in-register SSA code, but instead
+ /// allocate some stack memory through which these scalar values are passed.
+ /// Only a later pass of -mem2reg will then (re)introduce in-register
+ /// computations.
+ ///
+ /// To keep track of the memory location(s) used to store the data computed by
+ /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
+ /// given ScopArrayInfo to the junk of stack allocated memory, that is
+ /// used for code generation.
+ ///
+ /// Up to two different ScopArrayInfo objects are associated with each
+ /// llvm::Value:
+ ///
+ /// MemoryType::Value objects are used for normal scalar dependences that go
+ /// from a scalar definition to its use. Such dependences are lowered by
+ /// directly writing the value an instruction computes into the corresponding
+ /// chunk of memory and reading it back from this chunk of memory right before
+ /// every use of this original scalar value. The memory allocations for
+ /// MemoryType::Value objects end with '.s2a'.
+ ///
+ /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
+ /// nodes. For each PHI nodes we introduce, besides the Array of type
+ /// MemoryType::Value, a second chunk of memory into which we write at the end
+ /// of each basic block preceding the PHI instruction the value passed
+ /// through this basic block. At the place where the PHI node is executed, we
+ /// replace the PHI node with a load from the corresponding MemoryType::PHI
+ /// memory location. The memory allocations for MemoryType::PHI end with
+ /// '.phiops'.
+ ///
+ /// Example:
+ ///
+ /// Input C Code
+ /// ============
+ ///
+ /// S1: x1 = ...
+ /// for (i=0...N) {
+ /// S2: x2 = phi(x1, add)
+ /// S3: add = x2 + 42;
+ /// }
+ /// S4: print(x1)
+ /// print(x2)
+ /// print(add)
+ ///
+ ///
+ /// Unmodified IR IR After expansion
+ /// ============= ==================
+ ///
+ /// S1: x1 = ... S1: x1 = ...
+ /// x1.s2a = s1
+ /// x2.phiops = s1
+ /// | |
+ /// | <--<--<--<--< | <--<--<--<--<
+ /// | / \ | / \ .
+ /// V V \ V V \ .
+ /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops |
+ /// | x2.s2a = x2 |
+ /// | |
+ /// S3: add = x2 + 42 | S3: add = x2 + 42 |
+ /// | add.s2a = add |
+ /// | x2.phiops = add |
+ /// | \ / | \ /
+ /// | \ / | \ /
+ /// | >-->-->-->--> | >-->-->-->-->
+ /// V V
+ ///
+ /// S4: x1 = x1.s2a
+ /// S4: ... = x1 ... = x1
+ /// x2 = x2.s2a
+ /// ... = x2 ... = x2
+ /// add = add.s2a
+ /// ... = add ... = add
+ ///
+ /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
+ /// add:Value -> add.s2a, x2:PHI -> x2.phiops }
+ ///
+ /// ??? Why does a PHI-node require two memory chunks ???
+ ///
+ /// One may wonder why a PHI node requires two memory chunks and not just
+ /// all data is stored in a single location. The following example tries
+ /// to store all data in .s2a and drops the .phiops location:
+ ///
+ /// S1: x1 = ...
+ /// x1.s2a = s1
+ /// x2.s2a = s1 // use .s2a instead of .phiops
+ /// |
+ /// | <--<--<--<--<
+ /// | / \ .
+ /// V V \ .
+ /// S2: x2 = x2.s2a | // value is same as above, but read
+ /// | // from .s2a
+ /// |
+ /// x2.s2a = x2 | // store into .s2a as normal
+ /// |
+ /// S3: add = x2 + 42 |
+ /// add.s2a = add |
+ /// x2.s2a = add | // use s2a instead of .phiops
+ /// | \ / // !!! This is wrong, as x2.s2a now
+ /// | >-->-->-->--> // contains add instead of x2.
+ /// V
+ ///
+ /// S4: x1 = x1.s2a
+ /// ... = x1
+ /// x2 = x2.s2a // !!! We now read 'add' instead of
+ /// ... = x2 // 'x2'
+ /// add = add.s2a
+ /// ... = add
+ ///
+ /// As visible in the example, the SSA value of the PHI node may still be
+ /// needed _after_ the basic block, which could conceptually branch to the
+ /// PHI node, has been run and has overwritten the PHI's old value. Hence, a
+ /// single memory location is not enough to code-generate a PHI node.
+ ///
+ /// Memory locations used for the special PHI node modeling.
+ AllocaMapTy &ScalarMap;
+
+ /// Map from instructions to their escape users as well as the alloca.
+ EscapeUsersAllocaMapTy &EscapeMap;
+
+ /// A map from llvm::Values referenced in the old code to a new set of
+ /// llvm::Values, which is used to replace these old values during
+ /// code generation.
+ ValueMapT &GlobalMap;
+
+ /// The first basic block after the RTC.
+ BasicBlock *StartBlock;
+
+ /// Split @p BB to create a new one we can use to clone @p BB in.
+ BasicBlock *splitBB(BasicBlock *BB);
+
+ /// Copy the given basic block.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param BB The basic block to code generate.
+ /// @param BBMap A mapping from old values to their new values in this
+ /// block.
+ /// @param LTS A map from old loops to new induction variables as
+ /// SCEVs.
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ ///
+ /// @returns The copy of the basic block.
+ BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
+ LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses);
+
+ /// Copy the given basic block.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param BB The basic block to code generate.
+ /// @param BBCopy The new basic block to generate code in.
+ /// @param BBMap A mapping from old values to their new values in this
+ /// block.
+ /// @param LTS A map from old loops to new induction variables as
+ /// SCEVs.
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
+ ValueMapT &BBMap, LoopToScevMapT <S,
+ isl_id_to_ast_expr *NewAccesses);
+
+ /// Generate reload of scalars demoted to memory and needed by @p Stmt.
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values.
+ /// @param BBMap A mapping from old values to their new values in this block.
+ /// @param NewAccesses A map from memory access ids to new ast expressions.
+ void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT <S,
+ ValueMapT &BBMap,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// When statement tracing is enabled, build the print instructions for
+ /// printing the current statement instance.
+ ///
+ /// The printed output looks like:
+ ///
+ /// Stmt1(0)
+ ///
+ /// If printing of scalars is enabled, it also appends the value of each
+ /// scalar to the line:
+ ///
+ /// Stmt1(0) %i=1 %sum=5
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values.
+ /// @param BBMap A mapping from old values to their new values in this block.
+ void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S,
+ ValueMapT &BBMap);
+
+ /// Generate instructions that compute whether one instance of @p Set is
+ /// executed.
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
+ /// @p Stmt's domain are ignored.
+ ///
+ /// @return An expression of type i1, generated into the current builder
+ /// position, that evaluates to 1 if the executed instance is part of
+ /// @p Set.
+ Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
+
+ /// Generate code that executes in a subset of @p Stmt's domain.
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param Subdomain The condition for some code to be executed.
+ /// @param Subject A name for the code that is executed
+ /// conditionally. Used to name new basic blocks and
+ /// instructions.
+ /// @param GenThenFunc Callback which generates the code to be executed
+ /// when the current executed instance is in @p Set. The
+ /// IRBuilder's position is moved to within the block that
+ /// executes conditionally for this callback.
+ void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
+ StringRef Subject,
+ const std::function<void()> &GenThenFunc);
+
+ /// Generate the scalar stores for the given statement.
+ ///
+ /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
+ /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
+ /// be demoted to memory.
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block)
+ /// @param BBMap A mapping from old values to their new values in this block.
+ /// @param NewAccesses A map from memory access ids to new ast expressions.
+ virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S,
+ ValueMapT &BBMap,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// Handle users of @p Array outside the SCoP.
+ ///
+ /// @param S The current SCoP.
+ /// @param Inst The ScopArrayInfo to handle.
+ void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
+
+ /// Find scalar statements that have outside users.
+ ///
+ /// We register these scalar values to later update subsequent scalar uses of
+ /// these values to either use the newly computed value from within the scop
+ /// (if the scop was executed) or the unchanged original code (if the run-time
+ /// check failed).
+ ///
+ /// @param S The scop for which to find the outside users.
+ void findOutsideUsers(Scop &S);
+
+ /// Initialize the memory of demoted scalars.
+ ///
+ /// @param S The scop for which to generate the scalar initializers.
+ void createScalarInitialization(Scop &S);
+
+ /// Create exit PHI node merges for PHI nodes with more than two edges
+ /// from inside the scop.
+ ///
+ /// For scops which have a PHI node in the exit block that has more than two
+ /// incoming edges from inside the scop region, we require some special
+ /// handling to understand which of the possible values will be passed to the
+ /// PHI node from inside the optimized version of the scop. To do so ScopInfo
+ /// models the possible incoming values as write accesses of the ScopStmts.
+ ///
+ /// This function creates corresponding code to reload the computed outgoing
+ /// value from the stack slot it has been stored into and to pass it on to the
+ /// PHI node in the original exit block.
+ ///
+ /// @param S The scop for which to generate the exiting PHI nodes.
+ void createExitPHINodeMerges(Scop &S);
+
+ /// Promote the values of demoted scalars after the SCoP.
+ ///
+ /// If a scalar value was used outside the SCoP we need to promote the value
+ /// stored in the memory cell allocated for that scalar and combine it with
+ /// the original value in the non-optimized SCoP.
+ void createScalarFinalization(Scop &S);
+
+ /// Try to synthesize a new value
+ ///
+ /// Given an old value, we try to synthesize it in a new context from its
+ /// original SCEV expression. We start from the original SCEV expression,
+ /// then replace outdated parameter and loop references, and finally
+ /// expand it to code that computes this updated expression.
+ ///
+ /// @param Stmt The statement to code generate
+ /// @param Old The old Value
+ /// @param BBMap A mapping from old values to their new values
+ /// (for values recalculated within this basic block)
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block)
+ /// @param L The loop that surrounded the instruction that referenced
+ /// this value in the original code. This loop is used to
+ /// evaluate the scalar evolution at the right scope.
+ ///
+ /// @returns o A newly synthesized value.
+ /// o NULL, if synthesizing the value failed.
+ Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
+ LoopToScevMapT <S, Loop *L) const;
+
+ /// Get the new version of a value.
+ ///
+ /// Given an old value, we first check if a new version of this value is
+ /// available in the BBMap or GlobalMap. In case it is not and the value can
+ /// be recomputed using SCEV, we do so. If we can not recompute a value
+ /// using SCEV, but we understand that the value is constant within the scop,
+ /// we return the old value. If the value can still not be derived, this
+ /// function will assert.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param Old The old Value.
+ /// @param BBMap A mapping from old values to their new values
+ /// (for values recalculated within this basic block).
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block).
+ /// @param L The loop that surrounded the instruction that referenced
+ /// this value in the original code. This loop is used to
+ /// evaluate the scalar evolution at the right scope.
+ ///
+ /// @returns o The old value, if it is still valid.
+ /// o The new value, if available.
+ /// o NULL, if no value is found.
+ Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
+ LoopToScevMapT <S, Loop *L) const;
+
+ void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
+ LoopToScevMapT <S);
+
+ /// Get the innermost loop that surrounds the statement @p Stmt.
+ Loop *getLoopForStmt(const ScopStmt &Stmt) const;
+
+ /// Generate the operand address
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
+ ValueMapT &BBMap, LoopToScevMapT <S,
+ isl_id_to_ast_expr *NewAccesses);
+
+ /// Generate the operand address.
+ ///
+ /// @param Stmt The statement to generate code for.
+ /// @param L The innermost loop that surrounds the statement.
+ /// @param Pointer If the access expression is not changed (ie. not found
+ /// in @p LTS), use this Pointer from the original code
+ /// instead.
+ /// @param BBMap A mapping from old values to their new values.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values.
+ /// @param NewAccesses Ahead-of-time generated access expressions.
+ /// @param Id Identifier of the MemoryAccess to generate.
+ /// @param ExpectedType The type the returned value should have.
+ ///
+ /// @return The generated address.
+ Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
+ ValueMapT &BBMap, LoopToScevMapT <S,
+ isl_id_to_ast_expr *NewAccesses,
+ __isl_take isl_id *Id, Type *ExpectedType);
+
+ /// Generate the pointer value that is accesses by @p Access.
+ ///
+ /// For write accesses, generate the target address. For read accesses,
+ /// generate the source address.
+ /// The access can be either an array access or a scalar access. In the first
+ /// case, the returned address will point to an element into that array. In
+ /// the scalar case, an alloca is used.
+ /// If a new AccessRelation is set for the MemoryAccess, the new relation will
+ /// be used.
+ ///
+ /// @param Access The access to generate a pointer for.
+ /// @param L The innermost loop that surrounds the statement.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values.
+ /// @param BBMap A mapping from old values to their new values.
+ /// @param NewAccesses A map from memory access ids to new ast expressions.
+ ///
+ /// @return The generated address.
+ Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT <S,
+ ValueMapT &BBMap,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
+ LoopToScevMapT <S,
+ isl_id_to_ast_expr *NewAccesses);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
+ LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses);
+
+ /// Copy a single PHI instruction.
+ ///
+ /// The implementation in the BlockGenerator is trivial, however it allows
+ /// subclasses to handle PHIs different.
+ virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
+ LoopToScevMapT &) {}
+
+ /// Copy a single Instruction.
+ ///
+ /// This copies a single Instruction and updates references to old values
+ /// with references to new values, as defined by GlobalMap and BBMap.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param Inst The instruction to copy.
+ /// @param BBMap A mapping from old values to their new values
+ /// (for values recalculated within this basic block).
+ /// @param GlobalMap A mapping from old values to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block).
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block).
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
+ LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses);
+
+ /// Helper to determine if @p Inst can be synthesized in @p Stmt.
+ ///
+ /// @returns false, iff @p Inst can be synthesized in @p Stmt.
+ bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
+
+ /// Remove dead instructions generated for BB
+ ///
+ /// @param BB The basic block code for which code has been generated.
+ /// @param BBMap A local map from old to new instructions.
+ void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
+
+ /// Invalidate the scalar evolution expressions for a scop.
+ ///
+ /// This function invalidates the scalar evolution results for all
+ /// instructions that are part of a given scop, and the loops
+ /// surrounding the users of merge blocks. This is necessary to ensure that
+ /// later scops do not obtain scalar evolution expressions that reference
+ /// values that earlier dominated the later scop, but have been moved in the
+ /// conditional part of an earlier scop and consequently do not any more
+ /// dominate the later scop.
+ ///
+ /// @param S The scop to invalidate.
+ void invalidateScalarEvolution(Scop &S);
+};
+
+/// Generate a new vector basic block for a polyhedral statement.
+///
+/// The only public function exposed is generate().
+class VectorBlockGenerator : BlockGenerator {
+public:
+ /// Generate a new vector basic block for a ScoPStmt.
+ ///
+ /// This code generation is similar to the normal, scalar code generation,
+ /// except that each instruction is code generated for several vector lanes
+ /// at a time. If possible instructions are issued as actual vector
+ /// instructions, but e.g. for address calculation instructions we currently
+ /// generate scalar instructions for each vector lane.
+ ///
+ /// @param BlockGen A block generator object used as parent.
+ /// @param Stmt The statement to code generate.
+ /// @param VLTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block), one for each lane.
+ /// @param Schedule A map from the statement to a schedule where the
+ /// innermost dimension is the dimension of the innermost
+ /// loop containing the statement.
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
+ std::vector<LoopToScevMapT> &VLTS,
+ __isl_keep isl_map *Schedule,
+ __isl_keep isl_id_to_ast_expr *NewAccesses) {
+ VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
+ Generator.copyStmt(Stmt, NewAccesses);
+ }
+
+private:
+ // This is a vector of loop->scev maps. The first map is used for the first
+ // vector lane, ...
+ // Each map, contains information about Instructions in the old ScoP, which
+ // are recalculated in the new SCoP. When copying the basic block, we replace
+ // all references to the old instructions with their recalculated values.
+ //
+ // For example, when the code generator produces this AST:
+ //
+ // for (int c1 = 0; c1 <= 1023; c1 += 1)
+ // for (int c2 = 0; c2 <= 1023; c2 += VF)
+ // for (int lane = 0; lane <= VF; lane += 1)
+ // Stmt(c2 + lane + 3, c1);
+ //
+ // VLTS[lane] contains a map:
+ // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
+ // "inner loop in the old loop nest" -> SCEV("c1").
+ std::vector<LoopToScevMapT> &VLTS;
+
+ // A map from the statement to a schedule where the innermost dimension is the
+ // dimension of the innermost loop containing the statement.
+ isl_map *Schedule;
+
+ VectorBlockGenerator(BlockGenerator &BlockGen,
+ std::vector<LoopToScevMapT> &VLTS,
+ __isl_keep isl_map *Schedule);
+
+ int getVectorWidth();
+
+ Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps, Loop *L);
+
+ Type *getVectorPtrTy(const Value *V, int Width);
+
+ /// Load a vector from a set of adjacent scalars
+ ///
+ /// In case a set of scalars is known to be next to each other in memory,
+ /// create a vector load that loads those scalars
+ ///
+ /// %vector_ptr= bitcast double* %p to <4 x double>*
+ /// %vec_full = load <4 x double>* %vector_ptr
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param NegativeStride This is used to indicate a -1 stride. In such
+ /// a case we load the end of a base address and
+ /// shuffle the accesses in reverse order into the
+ /// vector. By default we would do only positive
+ /// strides.
+ ///
+ /// @param NewAccesses A map from memory access ids to new ast
+ /// expressions, which may contain new access
+ /// expressions for certain memory accesses.
+ Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
+ VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses,
+ bool NegativeStride);
+
+ /// Load a vector initialized from a single scalar in memory
+ ///
+ /// In case all elements of a vector are initialized to the same
+ /// scalar value, this value is loaded and shuffled into all elements
+ /// of the vector.
+ ///
+ /// %splat_one = load <1 x double>* %p
+ /// %splat = shufflevector <1 x double> %splat_one, <1 x
+ /// double> %splat_one, <4 x i32> zeroinitializer
+ ///
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
+ ValueMapT &BBMap,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// Load a vector from scalars distributed in memory
+ ///
+ /// In case some scalars a distributed randomly in memory. Create a vector
+ /// by loading each scalar and by inserting one after the other into the
+ /// vector.
+ ///
+ /// %scalar_1= load double* %p_1
+ /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
+ /// %scalar 2 = load double* %p_2
+ /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
+ ///
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
+ VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
+ ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
+
+ void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
+ ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
+ ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps);
+
+ bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
+
+ /// Generate vector loads for scalars.
+ ///
+ /// @param Stmt The scop statement for which to generate the loads.
+ /// @param VectorBlockMap A map that will be updated to relate the original
+ /// values with the newly generated vector loads.
+ void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
+
+ /// Verify absence of scalar stores.
+ ///
+ /// @param Stmt The scop statement to check for scalar stores.
+ void verifyNoScalarStores(ScopStmt &Stmt);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// @param NewAccesses A map from memory access ids to new ast expressions,
+ /// which may contain new access expressions for certain
+ /// memory accesses.
+ void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
+};
+
+/// Generator for new versions of polyhedral region statements.
+class RegionGenerator : public BlockGenerator {
+public:
+ /// Create a generator for regions.
+ ///
+ /// @param BlockGen A generator for basic blocks.
+ RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
+
+ virtual ~RegionGenerator() {}
+
+ /// Copy the region statement @p Stmt.
+ ///
+ /// This copies the entire region represented by @p Stmt and updates
+ /// references to old values with references to new values, as defined by
+ /// GlobalMap.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param LTS A map from old loops to new induction variables as SCEVs.
+ void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S,
+ __isl_keep isl_id_to_ast_expr *IdToAstExp);
+
+private:
+ /// A map from old to the first new block in the region, that was created to
+ /// model the old basic block.
+ DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
+
+ /// A map from old to the last new block in the region, that was created to
+ /// model the old basic block.
+ DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
+
+ /// The "BBMaps" for the whole region (one for each block). In case a basic
+ /// block is code generated to multiple basic blocks (e.g., for partial
+ /// writes), the StartBasic is used as index for the RegionMap.
+ DenseMap<BasicBlock *, ValueMapT> RegionMaps;
+
+ /// Mapping to remember PHI nodes that still need incoming values.
+ using PHINodePairTy = std::pair<PHINode *, PHINode *>;
+ DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
+
+ /// Repair the dominance tree after we created a copy block for @p BB.
+ ///
+ /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
+ BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
+
+ /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
+ ///
+ /// PHI nodes, which may have (multiple) edges that enter from outside the
+ /// non-affine subregion and even from outside the scop, are code generated as
+ /// follows:
+ ///
+ /// # Original
+ ///
+ /// Region: %A-> %exit
+ /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
+ ///
+ /// pre:
+ /// %val = add i64 1, 1
+ ///
+ /// A:
+ /// br label %nonaff
+ ///
+ /// nonaffB:
+ /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
+ /// %cmp = <nonaff>
+ /// br i1 %cmp, label %C, label %nonaffC
+ ///
+ /// nonaffC:
+ /// %valC = add i64 1, 1
+ /// br i1 undef, label %D, label %nonaffB
+ ///
+ /// D:
+ /// %valD = ...
+ /// %exit_cond = <loopexit>
+ /// br i1 %exit_cond, label %nonaffB, label %exit
+ ///
+ /// exit:
+ /// ...
+ ///
+ /// - %start and %C enter from outside the non-affine region.
+ /// - %nonaffC enters from within the non-affine region.
+ ///
+ /// # New
+ ///
+ /// polly.A:
+ /// store i64 %val, i64* %phi.phiops
+ /// br label %polly.nonaffA.entry
+ ///
+ /// polly.nonaffB.entry:
+ /// %phi.phiops.reload = load i64, i64* %phi.phiops
+ /// br label %nonaffB
+ ///
+ /// polly.nonaffB:
+ /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
+ /// [%p.valC, %polly.nonaffC]
+ ///
+ /// polly.nonaffC:
+ /// %p.valC = add i64 1, 1
+ /// br i1 undef, label %polly.D, label %polly.nonaffB
+ ///
+ /// polly.D:
+ /// %p.valD = ...
+ /// store i64 %p.valD, i64* %phi.phiops
+ /// %p.exit_cond = <loopexit>
+ /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit
+ ///
+ /// Values that enter the PHI from outside the non-affine region are stored
+ /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
+ /// reloaded in %polly.nonaffB.entry, a basic block generated before the
+ /// actual non-affine region.
+ ///
+ /// When generating the PHI node of the non-affine region in %polly.nonaffB,
+ /// incoming edges from outside the region are combined into a single branch
+ /// from %polly.nonaffB.entry which has as incoming value the value reloaded
+ /// from the %phi.phiops stack slot. Incoming edges from within the region
+ /// refer to the copied instructions (%p.valC) and basic blocks
+ /// (%polly.nonaffC) of the non-affine region.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param PHI The original PHI we copy.
+ /// @param PHICopy The copy of @p PHI.
+ /// @param IncomingBB An incoming block of @p PHI.
+ /// @param LTS A map from old loops to new induction variables as
+ /// SCEVs.
+ void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
+ BasicBlock *IncomingBB, LoopToScevMapT <S);
+
+ /// Create a PHI that combines the incoming values from all incoming blocks
+ /// that are in the subregion.
+ ///
+ /// PHIs in the subregion's exit block can have incoming edges from within and
+ /// outside the subregion. This function combines the incoming values from
+ /// within the subregion to appear as if there is only one incoming edge from
+ /// the subregion (an additional exit block is created by RegionGenerator).
+ /// This is to avoid that a value is written to the .phiops location without
+ /// leaving the subregion because the exiting block as an edge back into the
+ /// subregion.
+ ///
+ /// @param MA The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
+ /// the subregion's exit block.
+ /// @param LTS Virtual induction variable mapping.
+ /// @param BBMap A mapping from old values to their new values in this block.
+ /// @param L Loop surrounding this region statement.
+ ///
+ /// @returns The constructed PHI node.
+ PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap,
+ Loop *L);
+
+ /// @param Return the new value of a scalar write, creating a PHINode if
+ /// necessary.
+ ///
+ /// @param MA A scalar WRITE MemoryAccess.
+ /// @param LTS Virtual induction variable mapping.
+ /// @param BBMap A mapping from old values to their new values in this block.
+ ///
+ /// @returns The effective value of @p MA's written value when leaving the
+ /// subregion.
+ /// @see buildExitPHI
+ Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap);
+
+ /// Generate the scalar stores for the given statement.
+ ///
+ /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
+ /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
+ /// be demoted to memory.
+ ///
+ /// @param Stmt The statement we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction variable to
+ /// their new values (for values recalculated in the new ScoP,
+ /// but not within this basic block)
+ /// @param BBMap A mapping from old values to their new values in this block.
+ /// @param LTS A mapping from loops virtual canonical induction variable to
+ /// their new values.
+ virtual void
+ generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp,
+ __isl_keep isl_id_to_ast_expr *NewAccesses) override;
+
+ /// Copy a single PHI instruction.
+ ///
+ /// This copies a single PHI instruction and updates references to old values
+ /// with references to new values, as defined by GlobalMap and BBMap.
+ ///
+ /// @param Stmt The statement to code generate.
+ /// @param PHI The PHI instruction to copy.
+ /// @param BBMap A mapping from old values to their new values
+ /// (for values recalculated within this basic block).
+ /// @param LTS A map from old loops to new induction variables as SCEVs.
+ virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst,
+ ValueMapT &BBMap,
+ LoopToScevMapT <S) override;
+};
+} // namespace polly
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/CodeGeneration.h b/linux-x64/clang/include/polly/CodeGen/CodeGeneration.h
new file mode 100644
index 0000000..b32f312
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/CodeGeneration.h
@@ -0,0 +1,39 @@
+//===- polly/CodeGeneration.h - The Polly code generator --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_CODEGENERATION_H
+#define POLLY_CODEGENERATION_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/ScopPass.h"
+#include "llvm/IR/PassManager.h"
+
+namespace polly {
+
+enum VectorizerChoice {
+ VECTORIZER_NONE,
+ VECTORIZER_STRIPMINE,
+ VECTORIZER_POLLY,
+};
+extern VectorizerChoice PollyVectorizerChoice;
+
+/// Mark a basic block unreachable.
+///
+/// Marks the basic block @p Block unreachable by equipping it with an
+/// UnreachableInst.
+void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder);
+
+struct CodeGenerationPass : public PassInfoMixin<CodeGenerationPass> {
+ PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM,
+ ScopStandardAnalysisResults &AR, SPMUpdater &U);
+};
+
+extern bool PerfMonitoring;
+} // namespace polly
+
+#endif // POLLY_CODEGENERATION_H
diff --git a/linux-x64/clang/include/polly/CodeGen/CodegenCleanup.h b/linux-x64/clang/include/polly/CodeGen/CodegenCleanup.h
new file mode 100644
index 0000000..a1fd680
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/CodegenCleanup.h
@@ -0,0 +1,17 @@
+#ifndef POLLY_CODEGENCLEANUP_H
+#define POLLY_CODEGENCLEANUP_H
+
+namespace llvm {
+class FunctionPass;
+class PassRegistry;
+} // namespace llvm
+
+namespace polly {
+llvm::FunctionPass *createCodegenCleanupPass();
+} // namespace polly
+
+namespace llvm {
+void initializeCodegenCleanupPass(llvm::PassRegistry &);
+} // namespace llvm
+
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/IRBuilder.h b/linux-x64/clang/include/polly/CodeGen/IRBuilder.h
new file mode 100644
index 0000000..fb5d14a
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/IRBuilder.h
@@ -0,0 +1,158 @@
+//===- Codegen/IRBuilder.h - The IR builder used by Polly -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The Polly IRBuilder file contains Polly specific extensions for the IRBuilder
+// that are used e.g. to emit the llvm.loop.parallel metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_CODEGEN_IRBUILDER_H
+#define POLLY_CODEGEN_IRBUILDER_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/IRBuilder.h"
+
+namespace llvm {
+class Loop;
+class SCEV;
+class ScalarEvolution;
+} // namespace llvm
+
+namespace polly {
+class Scop;
+
+/// Helper class to annotate newly generated SCoPs with metadata.
+///
+/// The annotations are twofold:
+/// 1) Loops are stored in a stack-like structure in the order they are
+/// constructed and the LoopID metadata node is added to the backedge.
+/// Contained memory instructions and loop headers are annotated according
+/// to all parallel surrounding loops.
+/// 2) The new SCoP is assumed alias free (either due to the result of
+/// AliasAnalysis queries or runtime alias checks). We annotate therefore
+/// all memory instruction with alias scopes to indicate that fact to
+/// later optimizations.
+/// These alias scopes live in a new alias domain only used in this SCoP.
+/// Each base pointer has its own alias scope and is annotated to not
+/// alias with any access to different base pointers.
+class ScopAnnotator {
+public:
+ ScopAnnotator();
+
+ /// Build all alias scopes for the given SCoP.
+ void buildAliasScopes(Scop &S);
+
+ /// Add a new loop @p L which is parallel if @p IsParallel is true.
+ void pushLoop(llvm::Loop *L, bool IsParallel);
+
+ /// Remove the last added loop.
+ void popLoop(bool isParallel);
+
+ /// Annotate the new instruction @p I for all parallel loops.
+ void annotate(llvm::Instruction *I);
+
+ /// Annotate the loop latch @p B wrt. @p L.
+ void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
+ bool IsLoopVectorizerDisabled) const;
+
+ /// Add alternative alias based pointers
+ ///
+ /// When annotating instructions with alias scope metadata, the right metadata
+ /// is identified through the base pointer of the memory access. In some cases
+ /// (e.g. OpenMP code generation), the base pointer of the memory accesses is
+ /// not the original base pointer, but was changed when passing the original
+ /// base pointer over a function boundary. This function allows to provide a
+ /// map that maps from these new base pointers to the original base pointers
+ /// to allow the ScopAnnotator to still find the right alias scop annotations.
+ ///
+ /// @param NewMap A map from new base pointers to original base pointers.
+ void addAlternativeAliasBases(
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>,
+ llvm::AssertingVH<llvm::Value>> &NewMap) {
+ AlternativeAliasBases.insert(NewMap.begin(), NewMap.end());
+ }
+
+ /// Delete the set of alternative alias bases
+ void resetAlternativeAliasBases() { AlternativeAliasBases.clear(); }
+
+ /// Add inter iteration alias-free base pointer @p BasePtr.
+ void addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr);
+
+private:
+ /// Annotate with the second level alias metadata
+ ///
+ /// Annotate the instruction @p I with the second level alias metadata
+ /// to distinguish the individual non-aliasing accesses that have inter
+ /// iteration alias-free base pointers.
+ ///
+ /// @param I The instruction to be annotated.
+ /// @param BasePtr The base pointer of @p I.
+ void annotateSecondLevel(llvm::Instruction *I, llvm::Value *BasePtr);
+
+ /// The ScalarEvolution analysis we use to find base pointers.
+ llvm::ScalarEvolution *SE;
+
+ /// All loops currently under construction.
+ llvm::SmallVector<llvm::Loop *, 8> ActiveLoops;
+
+ /// Metadata pointing to parallel loops currently under construction.
+ llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops;
+
+ /// The alias scope domain for the current SCoP.
+ llvm::MDNode *AliasScopeDomain;
+
+ /// A map from base pointers to its alias scope.
+ llvm::MapVector<llvm::AssertingVH<llvm::Value>, llvm::MDNode *> AliasScopeMap;
+
+ /// A map from base pointers to an alias scope list of other pointers.
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+ OtherAliasScopeListMap;
+
+ /// A map from pointers to second level alias scopes.
+ llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *> SecondLevelAliasScopeMap;
+
+ /// A map from pointers to second level alias scope list of other pointers.
+ llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *>
+ SecondLevelOtherAliasScopeListMap;
+
+ /// Inter iteration alias-free base pointers.
+ llvm::SmallPtrSet<llvm::Value *, 4> InterIterationAliasFreeBasePtrs;
+
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::AssertingVH<llvm::Value>>
+ AlternativeAliasBases;
+};
+
+/// Add Polly specifics when running IRBuilder.
+///
+/// This is used to add additional items such as e.g. the llvm.loop.parallel
+/// metadata.
+class IRInserter final : public llvm::IRBuilderDefaultInserter {
+public:
+ IRInserter() = default;
+ IRInserter(class ScopAnnotator &A) : Annotator(&A) {}
+
+ void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
+ llvm::BasicBlock *BB,
+ llvm::BasicBlock::iterator InsertPt) const override {
+ llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
+ if (Annotator)
+ Annotator->annotate(I);
+ }
+
+private:
+ class ScopAnnotator *Annotator = nullptr;
+};
+
+// TODO: We should not name instructions in NDEBUG builds.
+//
+// We currently always name instructions, as the polly test suite currently
+// matches for certain names.
+typedef llvm::IRBuilder<llvm::ConstantFolder, IRInserter> PollyIRBuilder;
+
+} // namespace polly
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/IslAst.h b/linux-x64/clang/include/polly/CodeGen/IslAst.h
new file mode 100644
index 0000000..1a842b8
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/IslAst.h
@@ -0,0 +1,217 @@
+//===- IslAst.h - Interface to the isl code generator -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The isl code generator interface takes a Scop and generates a isl_ast. This
+// ist_ast can either be returned directly or it can be pretty printed to
+// stdout.
+//
+// A typical isl_ast output looks like this:
+//
+// for (c2 = max(0, ceild(n + m, 2); c2 <= min(511, floord(5 * n, 3)); c2++) {
+// bb2(c2);
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_ISLAST_H
+#define POLLY_ISLAST_H
+
+#include "polly/ScopPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/PassManager.h"
+#include "isl/ctx.h"
+
+namespace polly {
+
+struct Dependences;
+
+class IslAst {
+public:
+ IslAst(const IslAst &) = delete;
+ IslAst &operator=(const IslAst &) = delete;
+ IslAst(IslAst &&);
+ IslAst &operator=(IslAst &&) = delete;
+ ~IslAst();
+
+ static IslAst create(Scop &Scop, const Dependences &D);
+
+ /// Print a source code representation of the program.
+ void pprint(raw_ostream &OS);
+
+ __isl_give isl_ast_node *getAst();
+
+ const std::shared_ptr<isl_ctx> getSharedIslCtx() const { return Ctx; }
+
+ /// Get the run-time conditions for the Scop.
+ __isl_give isl_ast_expr *getRunCondition();
+
+ /// Build run-time condition for scop.
+ ///
+ /// @param S The scop to build the condition for.
+ /// @param Build The isl_build object to use to build the condition.
+ ///
+ /// @returns An ast expression that describes the necessary run-time check.
+ static isl_ast_expr *buildRunCondition(Scop &S,
+ __isl_keep isl_ast_build *Build);
+
+private:
+ Scop &S;
+ isl_ast_node *Root = nullptr;
+ isl_ast_expr *RunCondition = nullptr;
+ std::shared_ptr<isl_ctx> Ctx;
+
+ IslAst(Scop &Scop);
+
+ void init(const Dependences &D);
+};
+
+class IslAstInfo {
+public:
+ using MemoryAccessSet = SmallPtrSet<MemoryAccess *, 4>;
+
+ /// Payload information used to annotate an AST node.
+ struct IslAstUserPayload {
+ /// Construct and initialize the payload.
+ IslAstUserPayload() = default;
+
+ /// Cleanup all isl structs on destruction.
+ ~IslAstUserPayload();
+
+ /// Does the dependence analysis determine that there are no loop-carried
+ /// dependencies?
+ bool IsParallel = false;
+
+ /// Flag to mark innermost loops.
+ bool IsInnermost = false;
+
+ /// Flag to mark innermost parallel loops.
+ bool IsInnermostParallel = false;
+
+ /// Flag to mark outermost parallel loops.
+ bool IsOutermostParallel = false;
+
+ /// Flag to mark parallel loops which break reductions.
+ bool IsReductionParallel = false;
+
+ /// The minimal dependence distance for non parallel loops.
+ isl::pw_aff MinimalDependenceDistance;
+
+ /// The build environment at the time this node was constructed.
+ isl_ast_build *Build = nullptr;
+
+ /// Set of accesses which break reduction dependences.
+ MemoryAccessSet BrokenReductions;
+ };
+
+private:
+ Scop &S;
+ IslAst Ast;
+
+public:
+ IslAstInfo(Scop &S, const Dependences &D) : S(S), Ast(IslAst::create(S, D)) {}
+
+ /// Return the isl AST computed by this IslAstInfo.
+ IslAst &getIslAst() { return Ast; }
+
+ /// Return a copy of the AST root node.
+ __isl_give isl_ast_node *getAst();
+
+ /// Get the run condition.
+ ///
+ /// Only if the run condition evaluates at run-time to a non-zero value, the
+ /// assumptions that have been taken hold. If the run condition evaluates to
+ /// zero/false some assumptions do not hold and the original code needs to
+ /// be executed.
+ __isl_give isl_ast_expr *getRunCondition();
+
+ void print(raw_ostream &O);
+
+ /// @name Extract information attached to an isl ast (for) node.
+ ///
+ ///{
+ /// Get the complete payload attached to @p Node.
+ static IslAstUserPayload *getNodePayload(__isl_keep isl_ast_node *Node);
+
+ /// Is this loop an innermost loop?
+ static bool isInnermost(__isl_keep isl_ast_node *Node);
+
+ /// Is this loop a parallel loop?
+ static bool isParallel(__isl_keep isl_ast_node *Node);
+
+ /// Is this loop an outermost parallel loop?
+ static bool isOutermostParallel(__isl_keep isl_ast_node *Node);
+
+ /// Is this loop an innermost parallel loop?
+ static bool isInnermostParallel(__isl_keep isl_ast_node *Node);
+
+ /// Is this loop a reduction parallel loop?
+ static bool isReductionParallel(__isl_keep isl_ast_node *Node);
+
+ /// Will the loop be run as thread parallel?
+ static bool isExecutedInParallel(__isl_keep isl_ast_node *Node);
+
+ /// Get the nodes schedule or a nullptr if not available.
+ static __isl_give isl_union_map *getSchedule(__isl_keep isl_ast_node *Node);
+
+ /// Get minimal dependence distance or nullptr if not available.
+ static __isl_give isl_pw_aff *
+ getMinimalDependenceDistance(__isl_keep isl_ast_node *Node);
+
+ /// Get the nodes broken reductions or a nullptr if not available.
+ static MemoryAccessSet *getBrokenReductions(__isl_keep isl_ast_node *Node);
+
+ /// Get the nodes build context or a nullptr if not available.
+ static __isl_give isl_ast_build *getBuild(__isl_keep isl_ast_node *Node);
+
+ ///}
+};
+
+struct IslAstAnalysis : public AnalysisInfoMixin<IslAstAnalysis> {
+ static AnalysisKey Key;
+
+ using Result = IslAstInfo;
+
+ IslAstInfo run(Scop &S, ScopAnalysisManager &SAM,
+ ScopStandardAnalysisResults &SAR);
+};
+
+class IslAstInfoWrapperPass : public ScopPass {
+ std::unique_ptr<IslAstInfo> Ast;
+
+public:
+ static char ID;
+
+ IslAstInfoWrapperPass() : ScopPass(ID) {}
+
+ IslAstInfo &getAI() { return *Ast; }
+ const IslAstInfo &getAI() const { return *Ast; }
+
+ /// Build the AST for the given SCoP @p S.
+ bool runOnScop(Scop &S) override;
+
+ /// Register all analyses and transformation required.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Release the internal memory.
+ void releaseMemory() override;
+
+ /// Print a source code representation of the program.
+ void printScop(raw_ostream &OS, Scop &S) const override;
+};
+
+struct IslAstPrinterPass : public PassInfoMixin<IslAstPrinterPass> {
+ IslAstPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM,
+ ScopStandardAnalysisResults &, SPMUpdater &U);
+
+ raw_ostream &OS;
+};
+} // namespace polly
+
+#endif // POLLY_ISLAST_H
diff --git a/linux-x64/clang/include/polly/CodeGen/IslExprBuilder.h b/linux-x64/clang/include/polly/CodeGen/IslExprBuilder.h
new file mode 100644
index 0000000..998f8f6
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/IslExprBuilder.h
@@ -0,0 +1,267 @@
+//===-IslExprBuilder.h - Helper to generate code for isl AST expressions --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_ISL_EXPR_BUILDER_H
+#define POLLY_ISL_EXPR_BUILDER_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/Support/ScopHelper.h"
+#include "isl/isl-noexceptions.h"
+
+namespace llvm {
+// Provide PointerLikeTypeTraits for isl_id.
+template <> struct PointerLikeTypeTraits<isl_id *> {
+
+public:
+ static inline const void *getAsVoidPointer(isl_id *P) { return (void *)P; }
+ static inline const Region *getFromVoidPointer(void *P) {
+ return (Region *)P;
+ }
+ static constexpr int NumLowBitsAvailable = 0;
+};
+} // namespace llvm
+
+namespace polly {
+class ScopArrayInfo;
+
+/// LLVM-IR generator for isl_ast_expr[essions]
+///
+/// This generator generates LLVM-IR that performs the computation described by
+/// an isl_ast_expr[ession].
+///
+/// Example:
+///
+/// An isl_ast_expr[ession] can look like this:
+///
+/// (N + M) + 10
+///
+/// The IslExprBuilder could create the following LLVM-IR:
+///
+/// %tmp1 = add nsw i64 %N
+/// %tmp2 = add nsw i64 %tmp1, %M
+/// %tmp3 = add nsw i64 %tmp2, 10
+///
+/// The implementation of this class is mostly a mapping from isl_ast_expr
+/// constructs to the corresponding LLVM-IR constructs.
+///
+/// The following decisions may need some explanation:
+///
+/// 1) Which data-type to choose
+///
+/// isl_ast_expr[essions] are untyped expressions that assume arbitrary
+/// precision integer computations. LLVM-IR instead has fixed size integers.
+/// When lowering to LLVM-IR we need to chose both the size of the data type and
+/// the sign of the operations we use.
+///
+/// At the moment, we hardcode i64 bit signed computations. Our experience has
+/// shown that 64 bit are generally large enough for the loop bounds that appear
+/// in the wild. Signed computations are needed, as loop bounds may become
+/// negative.
+///
+/// It is possible to track overflows that occurred in the generated IR. See the
+/// description of @see OverflowState for more information.
+///
+/// FIXME: Hardcoding sizes can cause issues:
+///
+/// - On embedded systems and especially for high-level-synthesis 64 bit
+/// computations are very costly.
+///
+/// The right approach is to compute the minimal necessary bitwidth and
+/// signedness for each subexpression during in the isl AST generation and
+/// to use this information in our IslAstGenerator. Preliminary patches are
+/// available, but have not been committed yet.
+///
+class IslExprBuilder {
+public:
+ /// A map from isl_ids to llvm::Values.
+ typedef llvm::MapVector<isl_id *, llvm::AssertingVH<llvm::Value>> IDToValueTy;
+
+ typedef llvm::MapVector<isl_id *, const ScopArrayInfo *> IDToScopArrayInfoTy;
+
+ /// A map from isl_ids to ScopArrayInfo objects.
+ ///
+ /// This map is used to obtain ScopArrayInfo objects for isl_ids which do not
+ /// carry a ScopArrayInfo object in their user pointer. This is useful if the
+ /// construction of ScopArrayInfo objects happens only after references (e.g.
+ /// in an AST) to an isl_id are generated and the user pointer of the isl_id
+ /// can not be changed any more.
+ ///
+ /// This is useful for external users who just use the IslExprBuilder for
+ /// code generation.
+ IDToScopArrayInfoTy *IDToSAI = nullptr;
+
+ /// Set the isl_id to ScopArrayInfo map.
+ ///
+ /// @param NewIDToSAI The new isl_id to ScopArrayInfo map to use.
+ void setIDToSAI(IDToScopArrayInfoTy *NewIDToSAI) { IDToSAI = NewIDToSAI; }
+
+ /// Construct an IslExprBuilder.
+ ///
+ /// @param Builder The IRBuilder used to construct the
+ /// isl_ast_expr[ession]. The insert location of this
+ /// IRBuilder defines WHERE the corresponding LLVM-IR
+ /// is generated.
+ /// @param IDToValue The isl_ast_expr[ession] may reference parameters or
+ /// variables (identified by an isl_id). The IDTOValue map
+ /// specifies the LLVM-IR Values that correspond to these
+ /// parameters and variables.
+ /// @param GlobalMap A mapping from llvm::Values used in the original scop
+ /// region to a new set of llvm::Values.
+ /// @param DL DataLayout for the current Module.
+ /// @param SE ScalarEvolution analysis for the current function.
+ /// @param DT DominatorTree analysis for the current function.
+ /// @param LI LoopInfo analysis for the current function.
+ /// @param StartBlock The first basic block after the RTC.
+ IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue,
+ ValueMapT &GlobalMap, const llvm::DataLayout &DL,
+ llvm::ScalarEvolution &SE, llvm::DominatorTree &DT,
+ llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock);
+
+ /// Create LLVM-IR for an isl_ast_expr[ession].
+ ///
+ /// @param Expr The ast expression for which we generate LLVM-IR.
+ ///
+ /// @return The llvm::Value* containing the result of the computation.
+ llvm::Value *create(__isl_take isl_ast_expr *Expr);
+
+ /// Return the largest of two types.
+ ///
+ /// @param T1 The first type.
+ /// @param T2 The second type.
+ ///
+ /// @return The largest of the two types.
+ llvm::Type *getWidestType(llvm::Type *T1, llvm::Type *T2);
+
+ /// Return the type with which this expression should be computed.
+ ///
+ /// The type needs to be large enough to hold all possible input and all
+ /// possible output values.
+ ///
+ /// @param Expr The expression for which to find the type.
+ /// @return The type with which the expression should be computed.
+ llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr);
+
+ /// Change if runtime overflows are tracked or not.
+ ///
+ /// @param Enable Flag to enable/disable the tracking.
+ ///
+ /// Note that this will reset the tracking state and that tracking is only
+ /// allowed if the last tracked expression dominates the current insert point.
+ void setTrackOverflow(bool Enable);
+
+ /// Return the current overflow status or nullptr if it is not tracked.
+ ///
+ /// @return A nullptr if tracking is disabled or otherwise an i1 that has the
+ /// value of "0" if and only if no overflow happened since tracking
+ /// was enabled.
+ llvm::Value *getOverflowState() const;
+
+ /// Create LLVM-IR that computes the memory location of an access expression.
+ ///
+ /// For a given isl_ast_expr[ession] of type isl_ast_op_access this function
+ /// creates IR that computes the address the access expression refers to.
+ ///
+ /// @param Expr The ast expression of type isl_ast_op_access
+ /// for which we generate LLVM-IR.
+ ///
+ /// @return The llvm::Value* containing the result of the computation.
+ llvm::Value *createAccessAddress(__isl_take isl_ast_expr *Expr);
+
+ /// Check if an @p Expr contains integer constants larger than 64 bit.
+ ///
+ /// @param Expr The expression to check.
+ ///
+ /// @return True if the ast expression is larger than 64 bit.
+ bool hasLargeInts(isl::ast_expr Expr);
+
+private:
+ Scop &S;
+
+ /// Flag that will be set if an overflow occurred at runtime.
+ ///
+ /// Note that this flag is by default a nullptr and if it is a nullptr
+ /// we will not record overflows but simply perform the computations.
+ /// The intended usage is as follows:
+ /// - If overflows in [an] expression[s] should be tracked, call
+ /// the setTrackOverflow(true) function.
+ /// - Use create(...) for all expressions that should be checked.
+ /// - Call getOverflowState() to get the value representing the current
+ /// state of the overflow flag.
+ /// - To stop tracking call setTrackOverflow(false).
+ llvm::Value *OverflowState;
+
+ PollyIRBuilder &Builder;
+ IDToValueTy &IDToValue;
+ ValueMapT &GlobalMap;
+
+ const llvm::DataLayout &DL;
+ llvm::ScalarEvolution &SE;
+ llvm::DominatorTree &DT;
+ llvm::LoopInfo &LI;
+ llvm::BasicBlock *StartBlock;
+
+ llvm::Value *createOp(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpAccess(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpBin(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpNAry(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpSelect(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpICmp(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpBoolean(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpBooleanConditional(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createId(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createInt(__isl_take isl_ast_expr *Expr);
+ llvm::Value *createOpAddressOf(__isl_take isl_ast_expr *Expr);
+
+ /// Create a binary operation @p Opc and track overflows if requested.
+ ///
+ /// @param OpC The binary operation that should be performed [Add/Sub/Mul].
+ /// @param LHS The left operand.
+ /// @param RHS The right operand.
+ /// @param Name The (base) name of the new IR operations.
+ ///
+ /// @return A value that represents the result of the binary operation.
+ llvm::Value *createBinOp(llvm::BinaryOperator::BinaryOps Opc,
+ llvm::Value *LHS, llvm::Value *RHS,
+ const llvm::Twine &Name);
+
+ /// Create an addition and track overflows if requested.
+ ///
+ /// @param LHS The left operand.
+ /// @param RHS The right operand.
+ /// @param Name The (base) name of the new IR operations.
+ ///
+ /// @return A value that represents the result of the addition.
+ llvm::Value *createAdd(llvm::Value *LHS, llvm::Value *RHS,
+ const llvm::Twine &Name = "");
+
+ /// Create a subtraction and track overflows if requested.
+ ///
+ /// @param LHS The left operand.
+ /// @param RHS The right operand.
+ /// @param Name The (base) name of the new IR operations.
+ ///
+ /// @return A value that represents the result of the subtraction.
+ llvm::Value *createSub(llvm::Value *LHS, llvm::Value *RHS,
+ const llvm::Twine &Name = "");
+
+ /// Create a multiplication and track overflows if requested.
+ ///
+ /// @param LHS The left operand.
+ /// @param RHS The right operand.
+ /// @param Name The (base) name of the new IR operations.
+ ///
+ /// @return A value that represents the result of the multiplication.
+ llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS,
+ const llvm::Twine &Name = "");
+};
+} // namespace polly
+
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/IslNodeBuilder.h b/linux-x64/clang/include/polly/CodeGen/IslNodeBuilder.h
new file mode 100644
index 0000000..ac28af2
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/IslNodeBuilder.h
@@ -0,0 +1,432 @@
+//=- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IslNodeBuilder, a class to translate an isl AST into
+// a LLVM-IR AST.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_ISLNODEBUILDER_H
+#define POLLY_ISLNODEBUILDER_H
+
+#include "polly/CodeGen/BlockGenerators.h"
+#include "polly/CodeGen/IslExprBuilder.h"
+#include "polly/ScopDetectionDiagnostic.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/InstrTypes.h"
+#include "isl/ctx.h"
+#include "isl/isl-noexceptions.h"
+
+using namespace llvm;
+using namespace polly;
+
+namespace polly {
+
+struct InvariantEquivClassTy;
+} // namespace polly
+
+struct SubtreeReferences {
+ LoopInfo &LI;
+ ScalarEvolution &SE;
+ Scop &S;
+ ValueMapT &GlobalMap;
+ SetVector<Value *> &Values;
+ SetVector<const SCEV *> &SCEVs;
+ BlockGenerator &BlockGen;
+ // In case an (optional) parameter space location is provided, parameter space
+ // information is collected as well.
+ isl::space *ParamSpace;
+};
+
+/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
+///
+/// This includes the SCEVUnknowns referenced by the SCEVs used in the
+/// statement and the base pointers of the memory accesses. For scalar
+/// statements we force the generation of alloca memory locations and list
+/// these locations in the set of out-of-scop values as well.
+///
+/// We also collect an isl::space that includes all parameter dimensions
+/// used in the statement's memory accesses, in case the ParamSpace pointer
+/// is non-null.
+///
+/// @param Stmt The statement for which to extract the information.
+/// @param UserPtr A void pointer that can be casted to a
+/// SubtreeReferences structure.
+/// @param CreateScalarRefs Should the result include allocas of scalar
+/// references?
+void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
+ bool CreateScalarRefs = true);
+
+class IslNodeBuilder {
+public:
+ IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator,
+ const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE,
+ DominatorTree &DT, Scop &S, BasicBlock *StartBlock)
+ : S(S), Builder(Builder), Annotator(Annotator),
+ ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI,
+ StartBlock),
+ BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap,
+ &ExprBuilder, StartBlock),
+ RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT),
+ StartBlock(StartBlock) {}
+
+ virtual ~IslNodeBuilder() = default;
+
+ void addParameters(__isl_take isl_set *Context);
+
+ /// Create Values which hold the sizes of the outermost dimension of all
+ /// Fortran arrays in the current scop.
+ ///
+ /// @returns False, if a problem occurred and a Fortran array was not
+ /// materialized. True otherwise.
+ bool materializeFortranArrayOutermostDimension();
+
+ /// Generate code that evaluates @p Condition at run-time.
+ ///
+ /// This function is typically called to generate the LLVM-IR for the
+ /// run-time condition of the scop, that verifies that all the optimistic
+ /// assumptions we have taken during scop modeling and transformation
+ /// hold at run-time.
+ ///
+ /// @param Condition The condition to evaluate
+ ///
+ /// @result An llvm::Value that is true if the condition holds and false
+ /// otherwise.
+ Value *createRTC(isl_ast_expr *Condition);
+
+ void create(__isl_take isl_ast_node *Node);
+
+ /// Allocate memory for all new arrays created by Polly.
+ void allocateNewArrays(BBPair StartExitBlocks);
+
+ /// Preload all memory loads that are invariant.
+ bool preloadInvariantLoads();
+
+ /// Finalize code generation.
+ ///
+ /// @see BlockGenerator::finalizeSCoP(Scop &S)
+ virtual void finalize() { BlockGen.finalizeSCoP(S); }
+
+ IslExprBuilder &getExprBuilder() { return ExprBuilder; }
+
+ /// Get the associated block generator.
+ ///
+ /// @return A reference to the associated block generator.
+ BlockGenerator &getBlockGenerator() { return BlockGen; }
+
+ /// Return the parallel subfunctions that have been created.
+ const ArrayRef<Function *> getParallelSubfunctions() const {
+ return ParallelSubfunctions;
+ }
+
+protected:
+ Scop &S;
+ PollyIRBuilder &Builder;
+ ScopAnnotator &Annotator;
+
+ IslExprBuilder ExprBuilder;
+
+ /// Maps used by the block and region generator to demote scalars.
+ ///
+ ///@{
+
+ /// See BlockGenerator::ScalarMap.
+ BlockGenerator::AllocaMapTy ScalarMap;
+
+ /// See BlockGenerator::EscapeMap.
+ BlockGenerator::EscapeUsersAllocaMapTy EscapeMap;
+
+ ///@}
+
+ /// The generator used to copy a basic block.
+ BlockGenerator BlockGen;
+
+ /// The generator used to copy a non-affine region.
+ RegionGenerator RegionGen;
+
+ const DataLayout &DL;
+ LoopInfo &LI;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+ BasicBlock *StartBlock;
+
+ /// The current iteration of out-of-scop loops
+ ///
+ /// This map provides for a given loop a llvm::Value that contains the current
+ /// loop iteration.
+ MapVector<const Loop *, const SCEV *> OutsideLoopIterations;
+
+ // This maps an isl_id* to the Value* it has in the generated program. For now
+ // on, the only isl_ids that are stored here are the newly calculated loop
+ // ivs.
+ IslExprBuilder::IDToValueTy IDToValue;
+
+ /// A collection of all parallel subfunctions that have been created.
+ SmallVector<Function *, 8> ParallelSubfunctions;
+
+ /// Generate code for a given SCEV*
+ ///
+ /// This function generates code for a given SCEV expression. It generated
+ /// code is emitted at the end of the basic block our Builder currently
+ /// points to and the resulting value is returned.
+ ///
+ /// @param Expr The expression to code generate.
+ Value *generateSCEV(const SCEV *Expr);
+
+ /// A set of Value -> Value remappings to apply when generating new code.
+ ///
+ /// When generating new code for a ScopStmt this map is used to map certain
+ /// llvm::Values to new llvm::Values.
+ ValueMapT ValueMap;
+
+ /// Materialize code for @p Id if it was not done before.
+ ///
+ /// @returns False, iff a problem occurred and the value was not materialized.
+ bool materializeValue(__isl_take isl_id *Id);
+
+ /// Materialize parameters of @p Set.
+ ///
+ /// @returns False, iff a problem occurred and the value was not materialized.
+ bool materializeParameters(__isl_take isl_set *Set);
+
+ /// Materialize all parameters in the current scop.
+ ///
+ /// @returns False, iff a problem occurred and the value was not materialized.
+ bool materializeParameters();
+
+ // Extract the upper bound of this loop
+ //
+ // The isl code generation can generate arbitrary expressions to check if the
+ // upper bound of a loop is reached, but it provides an option to enforce
+ // 'atomic' upper bounds. An 'atomic upper bound is always of the form
+ // iv <= expr, where expr is an (arbitrary) expression not containing iv.
+ //
+ // This function extracts 'atomic' upper bounds. Polly, in general, requires
+ // atomic upper bounds for the following reasons:
+ //
+ // 1. An atomic upper bound is loop invariant
+ //
+ // It must not be calculated at each loop iteration and can often even be
+ // hoisted out further by the loop invariant code motion.
+ //
+ // 2. OpenMP needs a loop invariant upper bound to calculate the number
+ // of loop iterations.
+ //
+ // 3. With the existing code, upper bounds have been easier to implement.
+ isl::ast_expr getUpperBound(isl::ast_node For, CmpInst::Predicate &Predicate);
+
+ /// Return non-negative number of iterations in case of the following form
+ /// of a loop and -1 otherwise.
+ ///
+ /// for (i = 0; i <= NumIter; i++) {
+ /// loop body;
+ /// }
+ ///
+ /// NumIter is a non-negative integer value. Condition can have
+ /// isl_ast_op_lt type.
+ int getNumberOfIterations(isl::ast_node For);
+
+ /// Compute the values and loops referenced in this subtree.
+ ///
+ /// This function looks at all ScopStmts scheduled below the provided For node
+ /// and finds the llvm::Value[s] and llvm::Loops[s] which are referenced but
+ /// not locally defined.
+ ///
+ /// Values that can be synthesized or that are available as globals are
+ /// considered locally defined.
+ ///
+ /// Loops that contain the scop or that are part of the scop are considered
+ /// locally defined. Loops that are before the scop, but do not contain the
+ /// scop itself are considered not locally defined.
+ ///
+ /// @param For The node defining the subtree.
+ /// @param Values A vector that will be filled with the Values referenced in
+ /// this subtree.
+ /// @param Loops A vector that will be filled with the Loops referenced in
+ /// this subtree.
+ void getReferencesInSubtree(__isl_keep isl_ast_node *For,
+ SetVector<Value *> &Values,
+ SetVector<const Loop *> &Loops);
+
+ /// Change the llvm::Value(s) used for code generation.
+ ///
+ /// When generating code certain values (e.g., references to induction
+ /// variables or array base pointers) in the original code may be replaced by
+ /// new values. This function allows to (partially) update the set of values
+ /// used. A typical use case for this function is the case when we continue
+ /// code generation in a subfunction/kernel function and need to explicitly
+ /// pass down certain values.
+ ///
+ /// @param NewValues A map that maps certain llvm::Values to new llvm::Values.
+ void updateValues(ValueMapT &NewValues);
+
+ /// Return the most up-to-date version of the llvm::Value for code generation.
+ /// @param Original The Value to check for an up to date version.
+ /// @returns A remapped `Value` from ValueMap, or `Original` if no mapping
+ /// exists.
+ /// @see IslNodeBuilder::updateValues
+ /// @see IslNodeBuilder::ValueMap
+ Value *getLatestValue(Value *Original) const;
+
+ /// Generate code for a marker now.
+ ///
+ /// For mark nodes with an unknown name, we just forward the code generation
+ /// to its child. This is currently the only behavior implemented, as there is
+ /// currently not special handling for marker nodes implemented.
+ ///
+ /// @param Mark The node we generate code for.
+ virtual void createMark(__isl_take isl_ast_node *Marker);
+
+ virtual void createFor(__isl_take isl_ast_node *For);
+
+ /// Set to remember materialized invariant loads.
+ ///
+ /// An invariant load is identified by its pointer (the SCEV) and its type.
+ SmallSet<std::pair<const SCEV *, Type *>, 16> PreloadedPtrs;
+
+ /// Preload the memory access at @p AccessRange with @p Build.
+ ///
+ /// @returns The preloaded value casted to type @p Ty
+ Value *preloadUnconditionally(__isl_take isl_set *AccessRange,
+ isl_ast_build *Build, Instruction *AccInst);
+
+ /// Preload the memory load access @p MA.
+ ///
+ /// If @p MA is not always executed it will be conditionally loaded and
+ /// merged with undef from the same type. Hence, if @p MA is executed only
+ /// under condition C then the preload code will look like this:
+ ///
+ /// MA_preload = undef;
+ /// if (C)
+ /// MA_preload = load MA;
+ /// use MA_preload
+ Value *preloadInvariantLoad(const MemoryAccess &MA,
+ __isl_take isl_set *Domain);
+
+ /// Preload the invariant access equivalence class @p IAClass
+ ///
+ /// This function will preload the representing load from @p IAClass and
+ /// map all members of @p IAClass to that preloaded value, potentially casted
+ /// to the required type.
+ ///
+ /// @returns False, iff a problem occurred and the load was not preloaded.
+ bool preloadInvariantEquivClass(InvariantEquivClassTy &IAClass);
+
+ void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
+ void createForSequential(isl::ast_node For, bool MarkParallel);
+
+ /// Create LLVM-IR that executes a for node thread parallel.
+ ///
+ /// @param For The FOR isl_ast_node for which code is generated.
+ void createForParallel(__isl_take isl_ast_node *For);
+
+ /// Create new access functions for modified memory accesses.
+ ///
+ /// In case the access function of one of the memory references in the Stmt
+ /// has been modified, we generate a new isl_ast_expr that reflects the
+ /// newly modified access function and return a map that maps from the
+ /// individual memory references in the statement (identified by their id)
+ /// to these newly generated ast expressions.
+ ///
+ /// @param Stmt The statement for which to (possibly) generate new access
+ /// functions.
+ /// @param Node The ast node corresponding to the statement for us to extract
+ /// the local schedule from.
+ /// @return A new hash table that contains remappings from memory ids to new
+ /// access expressions.
+ __isl_give isl_id_to_ast_expr *
+ createNewAccesses(ScopStmt *Stmt, __isl_keep isl_ast_node *Node);
+
+ /// Generate LLVM-IR that computes the values of the original induction
+ /// variables in function of the newly generated loop induction variables.
+ ///
+ /// Example:
+ ///
+ /// // Original
+ /// for i
+ /// for j
+ /// S(i)
+ ///
+ /// Schedule: [i,j] -> [i+j, j]
+ ///
+ /// // New
+ /// for c0
+ /// for c1
+ /// S(c0 - c1, c1)
+ ///
+ /// Assuming the original code consists of two loops which are
+ /// transformed according to a schedule [i,j] -> [c0=i+j,c1=j]. The resulting
+ /// ast models the original statement as a call expression where each argument
+ /// is an expression that computes the old induction variables from the new
+ /// ones, ordered such that the first argument computes the value of induction
+ /// variable that was outermost in the original code.
+ ///
+ /// @param Expr The call expression that represents the statement.
+ /// @param Stmt The statement that is called.
+ /// @param LTS The loop to SCEV map in which the mapping from the original
+ /// loop to a SCEV representing the new loop iv is added. This
+ /// mapping does not require an explicit induction variable.
+ /// Instead, we think in terms of an implicit induction variable
+ /// that counts the number of times a loop is executed. For each
+ /// original loop this count, expressed in function of the new
+ /// induction variables, is added to the LTS map.
+ void createSubstitutions(__isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
+ LoopToScevMapT <S);
+ void createSubstitutionsVector(__isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
+ std::vector<LoopToScevMapT> &VLTS,
+ std::vector<Value *> &IVS,
+ __isl_take isl_id *IteratorID);
+ virtual void createIf(__isl_take isl_ast_node *If);
+ void createUserVector(__isl_take isl_ast_node *User,
+ std::vector<Value *> &IVS,
+ __isl_take isl_id *IteratorID,
+ __isl_take isl_union_map *Schedule);
+ virtual void createUser(__isl_take isl_ast_node *User);
+ virtual void createBlock(__isl_take isl_ast_node *Block);
+
+ /// Get the schedule for a given AST node.
+ ///
+ /// This information is used to reason about parallelism of loops or the
+ /// locality of memory accesses under a given schedule.
+ ///
+ /// @param Node The node we want to obtain the schedule for.
+ /// @return Return an isl_union_map that maps from the statements executed
+ /// below this ast node to the scheduling vectors used to enumerate
+ /// them.
+ ///
+ virtual __isl_give isl_union_map *
+ getScheduleForAstNode(__isl_take isl_ast_node *Node);
+
+private:
+ /// Create code for a copy statement.
+ ///
+ /// A copy statement is expected to have one read memory access and one write
+ /// memory access (in this very order). Data is loaded from the location
+ /// described by the read memory access and written to the location described
+ /// by the write memory access. @p NewAccesses contains for each access
+ /// the isl ast expression that describes the location accessed.
+ ///
+ /// @param Stmt The copy statement that contains the accesses.
+ /// @param NewAccesses The hash table that contains remappings from memory
+ /// ids to new access expressions.
+ void generateCopyStmt(ScopStmt *Stmt,
+ __isl_keep isl_id_to_ast_expr *NewAccesses);
+
+ /// Materialize a canonical loop induction variable for `L`, which is a loop
+ /// that is *not* present in the Scop.
+ ///
+ /// Note that this is materialized at the point where the `Builder` is
+ /// currently pointing.
+ /// We also populate the `OutsideLoopIterations` map with `L`s SCEV to keep
+ /// track of the induction variable.
+ /// See [Code generation of induction variables of loops outside Scops]
+ Value *materializeNonScopLoopInductionVariable(const Loop *L);
+};
+
+#endif // POLLY_ISLNODEBUILDER_H
diff --git a/linux-x64/clang/include/polly/CodeGen/LoopGenerators.h b/linux-x64/clang/include/polly/CodeGen/LoopGenerators.h
new file mode 100644
index 0000000..09a0424
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/LoopGenerators.h
@@ -0,0 +1,218 @@
+//===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and OpenMP parallel loops
+// as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef POLLY_LOOP_GENERATORS_H
+#define POLLY_LOOP_GENERATORS_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace polly {
+using namespace llvm;
+
+/// General scheduling types of parallel OpenMP for loops.
+/// Initialization values taken from OpenMP's enum in kmp.h: sched_type.
+/// Currently, only 'static' scheduling may change from chunked to non-chunked.
+enum class OMPGeneralSchedulingType {
+ StaticChunked = 33,
+ StaticNonChunked = 34,
+ Dynamic = 35,
+ Guided = 36,
+ Runtime = 37
+};
+
+extern int PollyNumThreads;
+extern OMPGeneralSchedulingType PollyScheduling;
+extern int PollyChunkSize;
+
+/// Create a scalar do/for-style loop.
+///
+/// @param LowerBound The starting value of the induction variable.
+/// @param UpperBound The upper bound of the induction variable.
+/// @param Stride The value by which the induction variable
+/// is incremented.
+///
+/// @param Builder The builder used to create the loop.
+/// @param P A pointer to the pass that uses this function.
+/// It is used to update analysis information.
+/// @param LI The loop info for the current function
+/// @param DT The dominator tree we need to update
+/// @param ExitBlock The block the loop will exit to.
+/// @param Predicate The predicate used to generate the upper loop
+/// bound.
+/// @param Annotator This function can (optionally) take
+/// a ScopAnnotator which
+/// annotates loops and alias information in the SCoP.
+/// @param Parallel If this loop should be marked parallel in
+/// the Annotator.
+/// @param UseGuard Create a guard in front of the header to check if
+/// the loop is executed at least once, otherwise just
+/// assume it.
+/// @param LoopVectDisabled If the Loop vectorizer should be disabled for this
+/// loop.
+///
+/// @return Value* The newly created induction variable for this loop.
+Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
+ PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT,
+ BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate,
+ ScopAnnotator *Annotator = NULL, bool Parallel = false,
+ bool UseGuard = true, bool LoopVectDisabled = false);
+
+/// The ParallelLoopGenerator allows to create parallelized loops
+///
+/// To parallelize a loop, we perform the following steps:
+/// o Generate a subfunction which will hold the loop body.
+/// o Create a struct to hold all outer values needed in the loop body.
+/// o Create calls to a runtime library to achieve the actual parallelism.
+/// These calls will spawn and join threads, define how the work (here the
+/// iterations) are distributed between them and make sure each has access
+/// to the struct holding all needed values.
+///
+/// At the moment we support only one parallel runtime, OpenMP.
+///
+/// If we parallelize the outer loop of the following loop nest,
+///
+/// S0;
+/// for (int i = 0; i < N; i++)
+/// for (int j = 0; j < M; j++)
+/// S1(i, j);
+/// S2;
+///
+/// we will generate the following code (with different runtime function names):
+///
+/// S0;
+/// auto *values = storeValuesIntoStruct();
+/// // Execute subfunction with multiple threads
+/// spawn_threads(subfunction, values);
+/// join_threads();
+/// S2;
+///
+/// // This function is executed in parallel by different threads
+/// void subfunction(values) {
+/// while (auto *WorkItem = getWorkItem()) {
+/// int LB = WorkItem.begin();
+/// int UB = WorkItem.end();
+/// for (int i = LB; i < UB; i++)
+/// for (int j = 0; j < M; j++)
+/// S1(i, j);
+/// }
+/// cleanup_thread();
+/// }
+class ParallelLoopGenerator {
+public:
+ /// Create a parallel loop generator for the current function.
+ ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : Builder(Builder), LI(LI), DT(DT),
+ LongType(
+ Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
+ M(Builder.GetInsertBlock()->getParent()->getParent()) {}
+
+ virtual ~ParallelLoopGenerator() {}
+
+ /// Create a parallel loop.
+ ///
+ /// This function is the main function to automatically generate a parallel
+ /// loop with all its components.
+ ///
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ /// @param Values A set of LLVM-IR Values that should be available in
+ /// the new loop body.
+ /// @param VMap A map to allow outside access to the new versions of
+ /// the values in @p Values.
+ /// @param LoopBody A pointer to an iterator that is set to point to the
+ /// body of the created loop. It should be used to insert
+ /// instructions that form the actual loop body.
+ ///
+ /// @return The newly created induction variable for this loop.
+ Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
+ SetVector<Value *> &Values, ValueMapT &VMap,
+ BasicBlock::iterator *LoopBody);
+
+protected:
+ /// The IR builder we use to create instructions.
+ PollyIRBuilder &Builder;
+
+ /// The loop info of the current function we need to update.
+ LoopInfo &LI;
+
+ /// The dominance tree of the current function we need to update.
+ DominatorTree &DT;
+
+ /// The type of a "long" on this hardware used for backend calls.
+ Type *LongType;
+
+ /// The current module
+ Module *M;
+
+public:
+ /// Create a struct for all @p Values and store them in there.
+ ///
+ /// @param Values The values which should be stored in the struct.
+ ///
+ /// @return The created struct.
+ AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values);
+
+ /// Extract all values from the @p Struct and construct the mapping.
+ ///
+ /// @param Values The values which were stored in the struct.
+ /// @param Struct The struct holding all the values in @p Values.
+ /// @param VMap A map to associate every element of @p Values with the
+ /// new llvm value loaded from the @p Struct.
+ void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty,
+ Value *Struct, ValueMapT &VMap);
+
+ /// Create the definition of the parallel subfunction.
+ ///
+ /// @return A pointer to the subfunction.
+ Function *createSubFnDefinition();
+
+ /// Create the runtime library calls for spawn and join of the worker threads.
+ /// Additionally, places a call to the specified subfunction.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ virtual void deployParallelExecution(Function *SubFn, Value *SubFnParam,
+ Value *LB, Value *UB, Value *Stride) = 0;
+
+ /// Prepare the definition of the parallel subfunction.
+ /// Creates the argument list and names them (as well as the subfunction).
+ ///
+ /// @param F A pointer to the (parallel) subfunction's parent function.
+ ///
+ /// @return The pointer to the (parallel) subfunction.
+ virtual Function *prepareSubFnDefinition(Function *F) const = 0;
+
+ /// Create the parallel subfunction.
+ ///
+ /// @param Stride The induction variable increment.
+ /// @param Struct A struct holding all values in @p Values.
+ /// @param Values A set of LLVM-IR Values that should be available in
+ /// the new loop body.
+ /// @param VMap A map to allow outside access to the new versions of
+ /// the values in @p Values.
+ /// @param SubFn The newly created subfunction is returned here.
+ ///
+ /// @return The newly created induction variable.
+ virtual std::tuple<Value *, Function *>
+ createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues,
+ ValueMapT &VMap) = 0;
+};
+} // end namespace polly
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsGOMP.h b/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsGOMP.h
new file mode 100644
index 0000000..b3ff982
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsGOMP.h
@@ -0,0 +1,75 @@
+//===- LoopGeneratorsGOMP.h - IR helper to create loops ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and OpenMP parallel loops
+// as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef POLLY_LOOP_GENERATORS_GOMP_H
+#define POLLY_LOOP_GENERATORS_GOMP_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/CodeGen/LoopGenerators.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace polly {
+using namespace llvm;
+
+/// This ParallelLoopGenerator subclass handles the generation of parallelized
+/// code, utilizing the GNU OpenMP library.
+class ParallelLoopGeneratorGOMP : public ParallelLoopGenerator {
+public:
+ /// Create a parallel loop generator for the current function.
+ ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, LI, DT, DL) {}
+
+ // The functions below may be used if one does not want to generate a
+ // specific OpenMP parallel loop, but generate individual parts of it
+ // (e.g. the subfunction definition).
+
+ /// Create a runtime library call to spawn the worker threads.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride);
+
+ void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride) override;
+
+ virtual Function *prepareSubFnDefinition(Function *F) const override;
+
+ std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
+ SetVector<Value *> UsedValues,
+ ValueMapT &VMap) override;
+
+ /// Create a runtime library call to join the worker threads.
+ void createCallJoinThreads();
+
+ /// Create a runtime library call to get the next work item.
+ ///
+ /// @param LBPtr A pointer value to store the work item begin in.
+ /// @param UBPtr A pointer value to store the work item end in.
+ ///
+ /// @returns A true value if the work item is not empty.
+ Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
+
+ /// Create a runtime library call to allow cleanup of the thread.
+ ///
+ /// @note This function is called right before the thread will exit the
+ /// subfunction and only if the runtime system depends on it.
+ void createCallCleanupThread();
+};
+} // end namespace polly
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsKMP.h b/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsKMP.h
new file mode 100644
index 0000000..470df60
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/LoopGeneratorsKMP.h
@@ -0,0 +1,144 @@
+//===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and OpenMP parallel loops
+// as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef POLLY_LOOP_GENERATORS_KMP_H
+#define POLLY_LOOP_GENERATORS_KMP_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/CodeGen/LoopGenerators.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace polly {
+using namespace llvm;
+
+/// This ParallelLoopGenerator subclass handles the generation of parallelized
+/// code, utilizing the LLVM OpenMP library.
+class ParallelLoopGeneratorKMP : public ParallelLoopGenerator {
+public:
+ /// Create a parallel loop generator for the current function.
+ ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, LI, DT, DL) {
+ SourceLocationInfo = createSourceLocation();
+ }
+
+protected:
+ /// The source location struct of this loop.
+ /// ident_t = type { i32, i32, i32, i32, i8* }
+ GlobalValue *SourceLocationInfo;
+
+ /// Convert the combination of given chunk size and scheduling type (which
+ /// might have been set via the command line) into the corresponding
+ /// scheduling type. This may result (e.g.) in a 'change' from
+ /// "static chunked" scheduling to "static non-chunked" (regarding the
+ /// provided and returned scheduling types).
+ ///
+ /// @param ChunkSize The chunk size, set via command line or its default.
+ /// @param Scheduling The scheduling, set via command line or its default.
+ ///
+ /// @return The corresponding OMPGeneralSchedulingType.
+ OMPGeneralSchedulingType
+ getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const;
+
+ /// Returns True if 'LongType' is 64bit wide, otherwise: False.
+ bool is64BitArch();
+
+public:
+ // The functions below may be used if one does not want to generate a
+ // specific OpenMP parallel loop, but generate individual parts of it
+ // (e.g. the subfunction definition).
+
+ /// Create a runtime library call to spawn the worker threads.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride);
+
+ void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride) override;
+
+ virtual Function *prepareSubFnDefinition(Function *F) const override;
+
+ std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
+ SetVector<Value *> UsedValues,
+ ValueMapT &VMap) override;
+
+ /// Create a runtime library call to get the current global thread number.
+ ///
+ /// @return A Value ref which holds the current global thread number.
+ Value *createCallGlobalThreadNum();
+
+ /// Create a runtime library call to request a number of threads.
+ /// Which will be used in the next OpenMP section (by the next fork).
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param NumThreads The number of threads to use.
+ void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads);
+
+ /// Create a runtime library call to prepare the OpenMP runtime.
+ /// For dynamically scheduled loops, saving the loop arguments.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param LB The loop's lower bound.
+ /// @param UB The loop's upper bound.
+ /// @param Inc The loop increment.
+ /// @param ChunkSize The chunk size of the parallel loop.
+ void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB,
+ Value *Inc, Value *ChunkSize);
+
+ /// Create a runtime library call to retrieve the next (dynamically)
+ /// allocated chunk of work for this thread.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is
+ /// the last chunk of work, or 0 otherwise.
+ /// @param LBPtr Pointer to the lower bound for the next chunk.
+ /// @param UBPtr Pointer to the upper bound for the next chunk.
+ /// @param StridePtr Pointer to the stride for the next chunk.
+ ///
+ /// @return A Value which holds 1 if there is work to be done, 0 otherwise.
+ Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr,
+ Value *LBPtr, Value *UBPtr, Value *StridePtr);
+
+ /// Create a runtime library call to prepare the OpenMP runtime.
+ /// For statically scheduled loops, saving the loop arguments.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is
+ /// the last chunk of work, or 0 otherwise.
+ /// @param LBPtr Pointer to the lower bound for the next chunk.
+ /// @param UBPtr Pointer to the upper bound for the next chunk.
+ /// @param StridePtr Pointer to the stride for the next chunk.
+ /// @param ChunkSize The chunk size of the parallel loop.
+ void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr,
+ Value *LBPtr, Value *UBPtr, Value *StridePtr,
+ Value *ChunkSize);
+
+ /// Create a runtime library call to mark the end of
+ /// a statically scheduled loop.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ void createCallStaticFini(Value *GlobalThreadID);
+
+ /// Create the current source location.
+ ///
+ /// TODO: Generates only(!) dummy values.
+ GlobalVariable *createSourceLocation();
+};
+} // end namespace polly
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/PPCGCodeGeneration.h b/linux-x64/clang/include/polly/CodeGen/PPCGCodeGeneration.h
new file mode 100644
index 0000000..d003fb2
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/PPCGCodeGeneration.h
@@ -0,0 +1,27 @@
+//===--- polly/PPCGCodeGeneration.h - Polly Accelerator Code Generation. --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Take a scop created by ScopInfo and map it to GPU code using the ppcg
+// GPU mapping strategy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_PPCGCODEGENERATION_H
+#define POLLY_PPCGCODEGENERATION_H
+
+/// The GPU Architecture to target.
+enum GPUArch { NVPTX64, SPIR32, SPIR64 };
+
+/// The GPU Runtime implementation to use.
+enum GPURuntime { CUDA, OpenCL };
+
+namespace polly {
+extern bool PollyManagedMemory;
+}
+
+#endif // POLLY_PPCGCODEGENERATION_H
diff --git a/linux-x64/clang/include/polly/CodeGen/PerfMonitor.h b/linux-x64/clang/include/polly/CodeGen/PerfMonitor.h
new file mode 100644
index 0000000..81aa4c7
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/PerfMonitor.h
@@ -0,0 +1,142 @@
+//===--- PerfMonitor.h --- Monitor time spent in scops --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PERF_MONITOR_H
+#define PERF_MONITOR_H
+
+#include "polly/CodeGen/IRBuilder.h"
+
+namespace polly {
+
+class PerfMonitor {
+public:
+ /// Create a new performance monitor.
+ ///
+ /// @param S The scop for which to generate fine-grained performance
+ /// monitoring information.
+ /// @param M The module for which to generate the performance monitor.
+ PerfMonitor(const Scop &S, llvm::Module *M);
+
+ /// Initialize the performance monitor.
+ ///
+ /// Ensure that all global variables, functions, and callbacks needed to
+ /// manage the performance monitor are initialized and registered.
+ void initialize();
+
+ /// Mark the beginning of a timing region.
+ ///
+ /// @param InsertBefore The instruction before which the timing region starts.
+ void insertRegionStart(llvm::Instruction *InsertBefore);
+
+ /// Mark the end of a timing region.
+ ///
+ /// @param InsertBefore The instruction before which the timing region starts.
+ void insertRegionEnd(llvm::Instruction *InsertBefore);
+
+private:
+ llvm::Module *M;
+ PollyIRBuilder Builder;
+
+ // The scop to profile against.
+ const Scop &S;
+
+ /// Indicates if performance profiling is supported on this architecture.
+ bool Supported;
+
+ /// The cycle counter at the beginning of the program execution.
+ llvm::Value *CyclesTotalStartPtr;
+
+ /// The total number of cycles spent in the current scop S.
+ llvm::Value *CyclesInCurrentScopPtr;
+
+ /// The total number of times the current scop S is executed.
+ llvm::Value *TripCountForCurrentScopPtr;
+
+ /// The total number of cycles spent within scops.
+ llvm::Value *CyclesInScopsPtr;
+
+ /// The value of the cycle counter at the beginning of the last scop.
+ llvm::Value *CyclesInScopStartPtr;
+
+ /// A global variable, that keeps track if the performance monitor
+ /// initialization has already been run.
+ llvm::Value *AlreadyInitializedPtr;
+
+ llvm::Function *insertInitFunction(llvm::Function *FinalReporting);
+
+ /// Add Function @p to list of global constructors
+ ///
+ /// If no global constructors are available in this current module, insert
+ /// a new list of global constructors containing @p Fn as only global
+ /// constructor. Otherwise, append @p Fn to the list of global constructors.
+ ///
+ /// All functions listed as global constructors are executed before the
+ /// main() function is called.
+ ///
+ /// @param Fn Function to add to global constructors
+ void addToGlobalConstructors(llvm::Function *Fn);
+
+ /// Add global variables to module.
+ ///
+ /// Insert a set of global variables that are used to track performance,
+ /// into the module (or obtain references to them if they already exist).
+ void addGlobalVariables();
+
+ /// Add per-scop tracking to module.
+ ///
+ /// Insert the global variable which is used to track the number of cycles
+ /// this scop runs.
+ void addScopCounter();
+
+ /// Get a reference to the intrinsic "{ i64, i32 } @llvm.x86.rdtscp()".
+ ///
+ /// The rdtscp function returns the current value of the processor's
+ /// time-stamp counter as well as the current CPU identifier. On modern x86
+ /// systems, the returned value is independent of the dynamic clock frequency
+ /// and consistent across multiple cores. It can consequently be used to get
+ /// accurate and low-overhead timing information. Even though the counter is
+ /// wrapping, it can be reliably used even for measuring longer time
+ /// intervals, as on a 1 GHz processor the counter only wraps every 545 years.
+ ///
+ /// The RDTSCP instruction is "pseudo" serializing:
+ ///
+ /// "“The RDTSCP instruction waits until all previous instructions have been
+ /// executed before reading the counter. However, subsequent instructions may
+ /// begin execution before the read operation is performed.”
+ ///
+ /// To ensure that no later instructions are scheduled before the RDTSCP
+ /// instruction it is often recommended to schedule a cpuid call after the
+ /// RDTSCP instruction. We do not do this yet, trading some imprecision in
+ /// our timing for a reduced overhead in our timing.
+ ///
+ /// @returns A reference to the declaration of @llvm.x86.rdtscp.
+ llvm::Function *getRDTSCP();
+
+ /// Get a reference to "int atexit(void (*function)(void))" function.
+ ///
+ /// This function allows to register function pointers that must be executed
+ /// when the program is terminated.
+ ///
+ /// @returns A reference to @atexit().
+ llvm::Function *getAtExit();
+
+ /// Create function "__polly_perf_final_reporting".
+ ///
+ /// This function finalizes the performance measurements and prints the
+ /// results to stdout. It is expected to be registered with 'atexit()'.
+ llvm::Function *insertFinalReporting();
+
+ /// Append Scop reporting data to "__polly_perf_final_reporting".
+ ///
+ /// This function appends the current scop (S)'s information to the final
+ /// printing function.
+ void AppendScopReporting();
+};
+} // namespace polly
+
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/RuntimeDebugBuilder.h b/linux-x64/clang/include/polly/CodeGen/RuntimeDebugBuilder.h
new file mode 100644
index 0000000..c40b53c
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/RuntimeDebugBuilder.h
@@ -0,0 +1,169 @@
+//===--- RuntimeDebugBuilder.h --- Helper to insert prints into LLVM-IR ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RUNTIME_DEBUG_BUILDER_H
+#define RUNTIME_DEBUG_BUILDER_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+namespace llvm {
+class Value;
+class Function;
+} // namespace llvm
+
+namespace polly {
+
+/// Insert function calls that print certain LLVM values at run time.
+///
+/// This class inserts libc function calls to print certain LLVM values at
+/// run time.
+struct RuntimeDebugBuilder {
+
+ /// Generate a constant string into the builder's llvm::Module which can be
+ /// passed to createGPUPrinter() or createGPUPrinter().
+ ///
+ /// @param Builder The builder used to emit the printer calls.
+ /// @param Str The string to be printed.
+
+ /// @return A global containing @p Str.
+ static llvm::Value *getPrintableString(PollyIRBuilder &Builder,
+ llvm::StringRef Str) {
+ // TODO: Get rid of magic number 4. It it NVPTX's constant address space and
+ // works on X86 (CPU) only because its backend ignores the address space.
+ return Builder.CreateGlobalStringPtr(Str, "", 4);
+ }
+
+ /// Return whether an llvm::Value of the type @p Ty is printable for
+ /// debugging.
+ ///
+ /// That is, whether such a value can be passed to createGPUPrinter() or
+ /// createGPUPrinter() to be dumped as runtime. If false is returned, those
+ /// functions will fail.
+ static bool isPrintable(llvm::Type *Ty);
+
+ /// Print a set of LLVM-IR Values or StringRefs via printf
+ ///
+ /// This function emits a call to printf that will print the given arguments.
+ /// It is useful for debugging CPU programs. All arguments given in this list
+ /// will be automatically concatenated and the resulting string will be
+ /// printed atomically. We also support ArrayRef arguments, which can be used
+ /// to provide of id values.
+ ///
+ /// @param Builder The builder used to emit the printer calls.
+ /// @param Args The list of values to print.
+ template <typename... Args>
+ static void createCPUPrinter(PollyIRBuilder &Builder, Args... args) {
+ std::vector<llvm::Value *> Vector;
+ createPrinter(Builder, /* CPU */ false, Vector, args...);
+ }
+
+ /// Print a set of LLVM-IR Values or StringRefs on an NVIDIA GPU.
+ ///
+ /// This function emits a call to vprintf that will print the given
+ /// arguments from within a kernel thread. It is useful for debugging
+ /// CUDA program kernels. All arguments given in this list will be
+ /// automatically concatenated and the resulting string will be printed
+ /// atomically. We also support ArrayRef arguments, which can be used to
+ /// provide for example a list of thread-id values.
+ ///
+ /// @param Builder The builder used to emit the printer calls.
+ /// @param Args The list of values to print.
+ template <typename... Args>
+ static void createGPUPrinter(PollyIRBuilder &Builder, Args... args) {
+ std::vector<llvm::Value *> Vector;
+ createPrinter(Builder, /* GPU */ true, Vector, args...);
+ }
+
+private:
+ /// Handle Values.
+ template <typename... Args>
+ static void createPrinter(PollyIRBuilder &Builder, bool UseGPU,
+ std::vector<llvm::Value *> &Values,
+ llvm::Value *Value, Args... args) {
+ Values.push_back(Value);
+ createPrinter(Builder, UseGPU, Values, args...);
+ }
+
+ /// Handle StringRefs.
+ template <typename... Args>
+ static void createPrinter(PollyIRBuilder &Builder, bool UseGPU,
+ std::vector<llvm::Value *> &Values,
+ llvm::StringRef String, Args... args) {
+ Values.push_back(getPrintableString(Builder, String));
+ createPrinter(Builder, UseGPU, Values, args...);
+ }
+
+ /// Handle ArrayRefs.
+ template <typename... Args>
+ static void createPrinter(PollyIRBuilder &Builder, bool UseGPU,
+ std::vector<llvm::Value *> &Values,
+ llvm::ArrayRef<llvm::Value *> Array, Args... args) {
+ Values.insert(Values.end(), Array.begin(), Array.end());
+ createPrinter(Builder, UseGPU, Values, args...);
+ }
+
+ /// Print a list of Values.
+ static void createPrinter(PollyIRBuilder &Builder, bool UseGPU,
+ llvm::ArrayRef<llvm::Value *> Values);
+
+ /// Print a list of Values on a GPU.
+ static void createGPUPrinterT(PollyIRBuilder &Builder,
+ llvm::ArrayRef<llvm::Value *> Values);
+
+ /// Print a list of Values on a CPU.
+ static void createCPUPrinterT(PollyIRBuilder &Builder,
+ llvm::ArrayRef<llvm::Value *> Values);
+
+ /// Get a reference to the 'printf' function.
+ ///
+ /// If the current module does not yet contain a reference to printf, we
+ /// insert a reference to it. Otherwise the existing reference is returned.
+ static llvm::Function *getPrintF(PollyIRBuilder &Builder);
+
+ /// Call printf
+ ///
+ /// @param Builder The builder used to insert the code.
+ /// @param Format The format string.
+ /// @param Values The set of values to print.
+ static void createPrintF(PollyIRBuilder &Builder, std::string Format,
+ llvm::ArrayRef<llvm::Value *> Values);
+
+ /// Get (and possibly insert) a vprintf declaration into the module.
+ static llvm::Function *getVPrintF(PollyIRBuilder &Builder);
+
+ /// Call fflush
+ ///
+ /// @parma Builder The builder used to insert the code.
+ static void createFlush(PollyIRBuilder &Builder);
+
+ /// Get (and possibly insert) a NVIDIA address space cast call.
+ static llvm::Function *getAddressSpaceCast(PollyIRBuilder &Builder,
+ unsigned Src, unsigned Dst,
+ unsigned SrcBits = 8,
+ unsigned DstBits = 8);
+
+ /// Get identifiers that describe the currently executed GPU thread.
+ ///
+ /// The result will be a vector that if passed to the GPU printer will result
+ /// into a string (initialized to values corresponding to the printing
+ /// thread):
+ ///
+ /// "> block-id: bidx bid1y bidz | thread-id: tidx tidy tidz "
+ static std::vector<llvm::Value *>
+ getGPUThreadIdentifiers(PollyIRBuilder &Builder);
+};
+} // namespace polly
+
+extern bool PollyDebugPrinting;
+
+#endif
diff --git a/linux-x64/clang/include/polly/CodeGen/Utils.h b/linux-x64/clang/include/polly/CodeGen/Utils.h
new file mode 100644
index 0000000..0678e34
--- /dev/null
+++ b/linux-x64/clang/include/polly/CodeGen/Utils.h
@@ -0,0 +1,72 @@
+//===- Utils.h - Utility functions for code generation ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for the code generation.
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_CODEGEN_UTILS_H
+#define POLLY_CODEGEN_UTILS_H
+
+#include <utility>
+
+namespace llvm {
+class Pass;
+class Value;
+class BasicBlock;
+class DominatorTree;
+class RegionInfo;
+class LoopInfo;
+class BranchInst;
+} // namespace llvm
+
+namespace polly {
+
+class Scop;
+
+using BBPair = std::pair<llvm::BasicBlock *, llvm::BasicBlock *>;
+/// Execute a Scop conditionally wrt @p RTC.
+///
+/// In the CFG the optimized code of the Scop is generated next to the
+/// original code. Both the new and the original version of the code remain
+/// in the CFG. A branch statement decides which version is executed based on
+/// the runtime value of @p RTC.
+///
+/// Before transformation:
+///
+/// bb0
+/// |
+/// orig_scop
+/// |
+/// bb1
+///
+/// After transformation:
+/// bb0
+/// |
+/// polly.splitBlock
+/// / \.
+/// | startBlock
+/// | |
+/// orig_scop new_scop
+/// \ /
+/// \ /
+/// bb1 (joinBlock)
+///
+/// @param S The Scop to execute conditionally.
+/// @param P A reference to the pass calling this function.
+/// @param RTC The runtime condition checked before executing the new SCoP.
+///
+/// @return An std::pair:
+/// - The first element is a BBPair of (StartBlock, EndBlock).
+/// - The second element is the BranchInst which conditionally
+/// branches to the SCoP based on the RTC.
+///
+std::pair<BBPair, llvm::BranchInst *>
+executeScopConditionally(Scop &S, llvm::Value *RTC, llvm::DominatorTree &DT,
+ llvm::RegionInfo &RI, llvm::LoopInfo &LI);
+} // namespace polly
+#endif