blob: f2c52c8fedcf96e08c1703800398774034a64a91 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001//===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the BlockGenerator and VectorBlockGenerator classes, which
10// generate sequential code and vectorized code for a polyhedral statement,
11// respectively.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef POLLY_BLOCK_GENERATORS_H
16#define POLLY_BLOCK_GENERATORS_H
17
18#include "polly/CodeGen/IRBuilder.h"
19#include "polly/Support/ScopHelper.h"
20#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21#include "isl/isl-noexceptions.h"
22
23namespace polly {
24using namespace llvm;
25class MemoryAccess;
26class ScopArrayInfo;
27class IslExprBuilder;
28
29/// Generate a new basic block for a polyhedral statement.
30class BlockGenerator {
31public:
32 typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
33
34 /// Map types to resolve scalar dependences.
35 ///
36 ///@{
37 using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
38
39 /// Simple vector of instructions to store escape users.
40 using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
41
42 /// Map type to resolve escaping users for scalar instructions.
43 ///
44 /// @see The EscapeMap member.
45 using EscapeUsersAllocaMapTy =
46 MapVector<Instruction *,
47 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
48
49 ///@}
50
51 /// Create a generator for basic blocks.
52 ///
53 /// @param Builder The LLVM-IR Builder used to generate the statement. The
54 /// code is generated at the location, the Builder points
55 /// to.
56 /// @param LI The loop info for the current function
57 /// @param SE The scalar evolution info for the current function
58 /// @param DT The dominator tree of this function.
59 /// @param ScalarMap Map from scalars to their demoted location.
60 /// @param EscapeMap Map from scalars to their escape users and locations.
61 /// @param GlobalMap A mapping from llvm::Values used in the original scop
62 /// region to a new set of llvm::Values. Each reference to
63 /// an original value appearing in this mapping is replaced
64 /// with the new value it is mapped to.
65 /// @param ExprBuilder An expression builder to generate new access functions.
66 /// @param StartBlock The first basic block after the RTC.
67 BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
68 DominatorTree &DT, AllocaMapTy &ScalarMap,
69 EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
70 IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
71
72 /// Copy the basic block.
73 ///
74 /// This copies the entire basic block and updates references to old values
75 /// with references to new values, as defined by GlobalMap.
76 ///
77 /// @param Stmt The block statement to code generate.
78 /// @param LTS A map from old loops to new induction variables as
79 /// SCEVs.
80 /// @param NewAccesses A map from memory access ids to new ast expressions,
81 /// which may contain new access expressions for certain
82 /// memory accesses.
83 void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
84 isl_id_to_ast_expr *NewAccesses);
85
86 /// Remove a ScopArrayInfo's allocation from the ScalarMap.
87 ///
88 /// This function allows to remove values from the ScalarMap. This is useful
89 /// if the corresponding alloca instruction will be deleted (or moved into
90 /// another module), as without removing these values the underlying
91 /// AssertingVH will trigger due to us still keeping reference to this
92 /// scalar.
93 ///
94 /// @param Array The array for which the alloca was generated.
95 void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
96
97 /// Return the alloca for @p Access.
98 ///
99 /// If no alloca was mapped for @p Access a new one is created.
100 ///
101 /// @param Access The memory access for which to generate the alloca.
102 ///
103 /// @returns The alloca for @p Access or a replacement value taken from
104 /// GlobalMap.
105 Value *getOrCreateAlloca(const MemoryAccess &Access);
106
107 /// Return the alloca for @p Array.
108 ///
109 /// If no alloca was mapped for @p Array a new one is created.
110 ///
111 /// @param Array The array for which to generate the alloca.
112 ///
113 /// @returns The alloca for @p Array or a replacement value taken from
114 /// GlobalMap.
115 Value *getOrCreateAlloca(const ScopArrayInfo *Array);
116
117 /// Finalize the code generation for the SCoP @p S.
118 ///
119 /// This will initialize and finalize the scalar variables we demoted during
120 /// the code generation.
121 ///
122 /// @see createScalarInitialization(Scop &)
123 /// @see createScalarFinalization(Region &)
124 void finalizeSCoP(Scop &S);
125
126 /// An empty destructor
127 virtual ~BlockGenerator() {}
128
129 BlockGenerator(const BlockGenerator &) = default;
130
131protected:
132 PollyIRBuilder &Builder;
133 LoopInfo &LI;
134 ScalarEvolution &SE;
135 IslExprBuilder *ExprBuilder;
136
137 /// The dominator tree of this function.
138 DominatorTree &DT;
139
140 /// The entry block of the current function.
141 BasicBlock *EntryBB;
142
143 /// Map to resolve scalar dependences for PHI operands and scalars.
144 ///
145 /// When translating code that contains scalar dependences as they result from
146 /// inter-block scalar dependences (including the use of data carrying PHI
147 /// nodes), we do not directly regenerate in-register SSA code, but instead
148 /// allocate some stack memory through which these scalar values are passed.
149 /// Only a later pass of -mem2reg will then (re)introduce in-register
150 /// computations.
151 ///
152 /// To keep track of the memory location(s) used to store the data computed by
153 /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
154 /// given ScopArrayInfo to the junk of stack allocated memory, that is
155 /// used for code generation.
156 ///
157 /// Up to two different ScopArrayInfo objects are associated with each
158 /// llvm::Value:
159 ///
160 /// MemoryType::Value objects are used for normal scalar dependences that go
161 /// from a scalar definition to its use. Such dependences are lowered by
162 /// directly writing the value an instruction computes into the corresponding
163 /// chunk of memory and reading it back from this chunk of memory right before
164 /// every use of this original scalar value. The memory allocations for
165 /// MemoryType::Value objects end with '.s2a'.
166 ///
167 /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
168 /// nodes. For each PHI nodes we introduce, besides the Array of type
169 /// MemoryType::Value, a second chunk of memory into which we write at the end
170 /// of each basic block preceding the PHI instruction the value passed
171 /// through this basic block. At the place where the PHI node is executed, we
172 /// replace the PHI node with a load from the corresponding MemoryType::PHI
173 /// memory location. The memory allocations for MemoryType::PHI end with
174 /// '.phiops'.
175 ///
176 /// Example:
177 ///
178 /// Input C Code
179 /// ============
180 ///
181 /// S1: x1 = ...
182 /// for (i=0...N) {
183 /// S2: x2 = phi(x1, add)
184 /// S3: add = x2 + 42;
185 /// }
186 /// S4: print(x1)
187 /// print(x2)
188 /// print(add)
189 ///
190 ///
191 /// Unmodified IR IR After expansion
192 /// ============= ==================
193 ///
194 /// S1: x1 = ... S1: x1 = ...
195 /// x1.s2a = s1
196 /// x2.phiops = s1
197 /// | |
198 /// | <--<--<--<--< | <--<--<--<--<
199 /// | / \ | / \ .
200 /// V V \ V V \ .
201 /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops |
202 /// | x2.s2a = x2 |
203 /// | |
204 /// S3: add = x2 + 42 | S3: add = x2 + 42 |
205 /// | add.s2a = add |
206 /// | x2.phiops = add |
207 /// | \ / | \ /
208 /// | \ / | \ /
209 /// | >-->-->-->--> | >-->-->-->-->
210 /// V V
211 ///
212 /// S4: x1 = x1.s2a
213 /// S4: ... = x1 ... = x1
214 /// x2 = x2.s2a
215 /// ... = x2 ... = x2
216 /// add = add.s2a
217 /// ... = add ... = add
218 ///
219 /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
220 /// add:Value -> add.s2a, x2:PHI -> x2.phiops }
221 ///
222 /// ??? Why does a PHI-node require two memory chunks ???
223 ///
224 /// One may wonder why a PHI node requires two memory chunks and not just
225 /// all data is stored in a single location. The following example tries
226 /// to store all data in .s2a and drops the .phiops location:
227 ///
228 /// S1: x1 = ...
229 /// x1.s2a = s1
230 /// x2.s2a = s1 // use .s2a instead of .phiops
231 /// |
232 /// | <--<--<--<--<
233 /// | / \ .
234 /// V V \ .
235 /// S2: x2 = x2.s2a | // value is same as above, but read
236 /// | // from .s2a
237 /// |
238 /// x2.s2a = x2 | // store into .s2a as normal
239 /// |
240 /// S3: add = x2 + 42 |
241 /// add.s2a = add |
242 /// x2.s2a = add | // use s2a instead of .phiops
243 /// | \ / // !!! This is wrong, as x2.s2a now
244 /// | >-->-->-->--> // contains add instead of x2.
245 /// V
246 ///
247 /// S4: x1 = x1.s2a
248 /// ... = x1
249 /// x2 = x2.s2a // !!! We now read 'add' instead of
250 /// ... = x2 // 'x2'
251 /// add = add.s2a
252 /// ... = add
253 ///
254 /// As visible in the example, the SSA value of the PHI node may still be
255 /// needed _after_ the basic block, which could conceptually branch to the
256 /// PHI node, has been run and has overwritten the PHI's old value. Hence, a
257 /// single memory location is not enough to code-generate a PHI node.
258 ///
259 /// Memory locations used for the special PHI node modeling.
260 AllocaMapTy &ScalarMap;
261
262 /// Map from instructions to their escape users as well as the alloca.
263 EscapeUsersAllocaMapTy &EscapeMap;
264
265 /// A map from llvm::Values referenced in the old code to a new set of
266 /// llvm::Values, which is used to replace these old values during
267 /// code generation.
268 ValueMapT &GlobalMap;
269
270 /// The first basic block after the RTC.
271 BasicBlock *StartBlock;
272
273 /// Split @p BB to create a new one we can use to clone @p BB in.
274 BasicBlock *splitBB(BasicBlock *BB);
275
276 /// Copy the given basic block.
277 ///
278 /// @param Stmt The statement to code generate.
279 /// @param BB The basic block to code generate.
280 /// @param BBMap A mapping from old values to their new values in this
281 /// block.
282 /// @param LTS A map from old loops to new induction variables as
283 /// SCEVs.
284 /// @param NewAccesses A map from memory access ids to new ast expressions,
285 /// which may contain new access expressions for certain
286 /// memory accesses.
287 ///
288 /// @returns The copy of the basic block.
289 BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
290 LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
291
292 /// Copy the given basic block.
293 ///
294 /// @param Stmt The statement to code generate.
295 /// @param BB The basic block to code generate.
296 /// @param BBCopy The new basic block to generate code in.
297 /// @param BBMap A mapping from old values to their new values in this
298 /// block.
299 /// @param LTS A map from old loops to new induction variables as
300 /// SCEVs.
301 /// @param NewAccesses A map from memory access ids to new ast expressions,
302 /// which may contain new access expressions for certain
303 /// memory accesses.
304 void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
305 ValueMapT &BBMap, LoopToScevMapT &LTS,
306 isl_id_to_ast_expr *NewAccesses);
307
308 /// Generate reload of scalars demoted to memory and needed by @p Stmt.
309 ///
310 /// @param Stmt The statement we generate code for.
311 /// @param LTS A mapping from loops virtual canonical induction
312 /// variable to their new values.
313 /// @param BBMap A mapping from old values to their new values in this block.
314 /// @param NewAccesses A map from memory access ids to new ast expressions.
315 void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
316 ValueMapT &BBMap,
317 __isl_keep isl_id_to_ast_expr *NewAccesses);
318
319 /// When statement tracing is enabled, build the print instructions for
320 /// printing the current statement instance.
321 ///
322 /// The printed output looks like:
323 ///
324 /// Stmt1(0)
325 ///
326 /// If printing of scalars is enabled, it also appends the value of each
327 /// scalar to the line:
328 ///
329 /// Stmt1(0) %i=1 %sum=5
330 ///
331 /// @param Stmt The statement we generate code for.
332 /// @param LTS A mapping from loops virtual canonical induction
333 /// variable to their new values.
334 /// @param BBMap A mapping from old values to their new values in this block.
335 void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
336 ValueMapT &BBMap);
337
338 /// Generate instructions that compute whether one instance of @p Set is
339 /// executed.
340 ///
341 /// @param Stmt The statement we generate code for.
342 /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
343 /// @p Stmt's domain are ignored.
344 ///
345 /// @return An expression of type i1, generated into the current builder
346 /// position, that evaluates to 1 if the executed instance is part of
347 /// @p Set.
348 Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
349
350 /// Generate code that executes in a subset of @p Stmt's domain.
351 ///
352 /// @param Stmt The statement we generate code for.
353 /// @param Subdomain The condition for some code to be executed.
354 /// @param Subject A name for the code that is executed
355 /// conditionally. Used to name new basic blocks and
356 /// instructions.
357 /// @param GenThenFunc Callback which generates the code to be executed
358 /// when the current executed instance is in @p Set. The
359 /// IRBuilder's position is moved to within the block that
360 /// executes conditionally for this callback.
361 void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
362 StringRef Subject,
363 const std::function<void()> &GenThenFunc);
364
365 /// Generate the scalar stores for the given statement.
366 ///
367 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
368 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
369 /// be demoted to memory.
370 ///
371 /// @param Stmt The statement we generate code for.
372 /// @param LTS A mapping from loops virtual canonical induction
373 /// variable to their new values
374 /// (for values recalculated in the new ScoP, but not
375 /// within this basic block)
376 /// @param BBMap A mapping from old values to their new values in this block.
377 /// @param NewAccesses A map from memory access ids to new ast expressions.
378 virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
379 ValueMapT &BBMap,
380 __isl_keep isl_id_to_ast_expr *NewAccesses);
381
382 /// Handle users of @p Array outside the SCoP.
383 ///
384 /// @param S The current SCoP.
385 /// @param Inst The ScopArrayInfo to handle.
386 void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
387
388 /// Find scalar statements that have outside users.
389 ///
390 /// We register these scalar values to later update subsequent scalar uses of
391 /// these values to either use the newly computed value from within the scop
392 /// (if the scop was executed) or the unchanged original code (if the run-time
393 /// check failed).
394 ///
395 /// @param S The scop for which to find the outside users.
396 void findOutsideUsers(Scop &S);
397
398 /// Initialize the memory of demoted scalars.
399 ///
400 /// @param S The scop for which to generate the scalar initializers.
401 void createScalarInitialization(Scop &S);
402
403 /// Create exit PHI node merges for PHI nodes with more than two edges
404 /// from inside the scop.
405 ///
406 /// For scops which have a PHI node in the exit block that has more than two
407 /// incoming edges from inside the scop region, we require some special
408 /// handling to understand which of the possible values will be passed to the
409 /// PHI node from inside the optimized version of the scop. To do so ScopInfo
410 /// models the possible incoming values as write accesses of the ScopStmts.
411 ///
412 /// This function creates corresponding code to reload the computed outgoing
413 /// value from the stack slot it has been stored into and to pass it on to the
414 /// PHI node in the original exit block.
415 ///
416 /// @param S The scop for which to generate the exiting PHI nodes.
417 void createExitPHINodeMerges(Scop &S);
418
419 /// Promote the values of demoted scalars after the SCoP.
420 ///
421 /// If a scalar value was used outside the SCoP we need to promote the value
422 /// stored in the memory cell allocated for that scalar and combine it with
423 /// the original value in the non-optimized SCoP.
424 void createScalarFinalization(Scop &S);
425
426 /// Try to synthesize a new value
427 ///
428 /// Given an old value, we try to synthesize it in a new context from its
429 /// original SCEV expression. We start from the original SCEV expression,
430 /// then replace outdated parameter and loop references, and finally
431 /// expand it to code that computes this updated expression.
432 ///
433 /// @param Stmt The statement to code generate
434 /// @param Old The old Value
435 /// @param BBMap A mapping from old values to their new values
436 /// (for values recalculated within this basic block)
437 /// @param LTS A mapping from loops virtual canonical induction
438 /// variable to their new values
439 /// (for values recalculated in the new ScoP, but not
440 /// within this basic block)
441 /// @param L The loop that surrounded the instruction that referenced
442 /// this value in the original code. This loop is used to
443 /// evaluate the scalar evolution at the right scope.
444 ///
445 /// @returns o A newly synthesized value.
446 /// o NULL, if synthesizing the value failed.
447 Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
448 LoopToScevMapT &LTS, Loop *L) const;
449
450 /// Get the new version of a value.
451 ///
452 /// Given an old value, we first check if a new version of this value is
453 /// available in the BBMap or GlobalMap. In case it is not and the value can
454 /// be recomputed using SCEV, we do so. If we can not recompute a value
455 /// using SCEV, but we understand that the value is constant within the scop,
456 /// we return the old value. If the value can still not be derived, this
457 /// function will assert.
458 ///
459 /// @param Stmt The statement to code generate.
460 /// @param Old The old Value.
461 /// @param BBMap A mapping from old values to their new values
462 /// (for values recalculated within this basic block).
463 /// @param LTS A mapping from loops virtual canonical induction
464 /// variable to their new values
465 /// (for values recalculated in the new ScoP, but not
466 /// within this basic block).
467 /// @param L The loop that surrounded the instruction that referenced
468 /// this value in the original code. This loop is used to
469 /// evaluate the scalar evolution at the right scope.
470 ///
471 /// @returns o The old value, if it is still valid.
472 /// o The new value, if available.
473 /// o NULL, if no value is found.
474 Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
475 LoopToScevMapT &LTS, Loop *L) const;
476
477 void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
478 LoopToScevMapT &LTS);
479
480 /// Get the innermost loop that surrounds the statement @p Stmt.
481 Loop *getLoopForStmt(const ScopStmt &Stmt) const;
482
483 /// Generate the operand address
484 /// @param NewAccesses A map from memory access ids to new ast expressions,
485 /// which may contain new access expressions for certain
486 /// memory accesses.
487 Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
488 ValueMapT &BBMap, LoopToScevMapT &LTS,
489 isl_id_to_ast_expr *NewAccesses);
490
491 /// Generate the operand address.
492 ///
493 /// @param Stmt The statement to generate code for.
494 /// @param L The innermost loop that surrounds the statement.
495 /// @param Pointer If the access expression is not changed (ie. not found
496 /// in @p LTS), use this Pointer from the original code
497 /// instead.
498 /// @param BBMap A mapping from old values to their new values.
499 /// @param LTS A mapping from loops virtual canonical induction
500 /// variable to their new values.
501 /// @param NewAccesses Ahead-of-time generated access expressions.
502 /// @param Id Identifier of the MemoryAccess to generate.
503 /// @param ExpectedType The type the returned value should have.
504 ///
505 /// @return The generated address.
506 Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
507 ValueMapT &BBMap, LoopToScevMapT &LTS,
508 isl_id_to_ast_expr *NewAccesses,
509 __isl_take isl_id *Id, Type *ExpectedType);
510
511 /// Generate the pointer value that is accesses by @p Access.
512 ///
513 /// For write accesses, generate the target address. For read accesses,
514 /// generate the source address.
515 /// The access can be either an array access or a scalar access. In the first
516 /// case, the returned address will point to an element into that array. In
517 /// the scalar case, an alloca is used.
518 /// If a new AccessRelation is set for the MemoryAccess, the new relation will
519 /// be used.
520 ///
521 /// @param Access The access to generate a pointer for.
522 /// @param L The innermost loop that surrounds the statement.
523 /// @param LTS A mapping from loops virtual canonical induction
524 /// variable to their new values.
525 /// @param BBMap A mapping from old values to their new values.
526 /// @param NewAccesses A map from memory access ids to new ast expressions.
527 ///
528 /// @return The generated address.
529 Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
530 ValueMapT &BBMap,
531 __isl_keep isl_id_to_ast_expr *NewAccesses);
532
533 /// @param NewAccesses A map from memory access ids to new ast expressions,
534 /// which may contain new access expressions for certain
535 /// memory accesses.
536 Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
537 LoopToScevMapT &LTS,
538 isl_id_to_ast_expr *NewAccesses);
539
540 /// @param NewAccesses A map from memory access ids to new ast expressions,
541 /// which may contain new access expressions for certain
542 /// memory accesses.
543 void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
544 LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
545
546 /// Copy a single PHI instruction.
547 ///
548 /// The implementation in the BlockGenerator is trivial, however it allows
549 /// subclasses to handle PHIs different.
550 virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
551 LoopToScevMapT &) {}
552
553 /// Copy a single Instruction.
554 ///
555 /// This copies a single Instruction and updates references to old values
556 /// with references to new values, as defined by GlobalMap and BBMap.
557 ///
558 /// @param Stmt The statement to code generate.
559 /// @param Inst The instruction to copy.
560 /// @param BBMap A mapping from old values to their new values
561 /// (for values recalculated within this basic block).
562 /// @param GlobalMap A mapping from old values to their new values
563 /// (for values recalculated in the new ScoP, but not
564 /// within this basic block).
565 /// @param LTS A mapping from loops virtual canonical induction
566 /// variable to their new values
567 /// (for values recalculated in the new ScoP, but not
568 /// within this basic block).
569 /// @param NewAccesses A map from memory access ids to new ast expressions,
570 /// which may contain new access expressions for certain
571 /// memory accesses.
572 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
573 LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
574
575 /// Helper to determine if @p Inst can be synthesized in @p Stmt.
576 ///
577 /// @returns false, iff @p Inst can be synthesized in @p Stmt.
578 bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
579
580 /// Remove dead instructions generated for BB
581 ///
582 /// @param BB The basic block code for which code has been generated.
583 /// @param BBMap A local map from old to new instructions.
584 void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
585
586 /// Invalidate the scalar evolution expressions for a scop.
587 ///
588 /// This function invalidates the scalar evolution results for all
589 /// instructions that are part of a given scop, and the loops
590 /// surrounding the users of merge blocks. This is necessary to ensure that
591 /// later scops do not obtain scalar evolution expressions that reference
592 /// values that earlier dominated the later scop, but have been moved in the
593 /// conditional part of an earlier scop and consequently do not any more
594 /// dominate the later scop.
595 ///
596 /// @param S The scop to invalidate.
597 void invalidateScalarEvolution(Scop &S);
598};
599
600/// Generate a new vector basic block for a polyhedral statement.
601///
602/// The only public function exposed is generate().
603class VectorBlockGenerator : BlockGenerator {
604public:
605 /// Generate a new vector basic block for a ScoPStmt.
606 ///
607 /// This code generation is similar to the normal, scalar code generation,
608 /// except that each instruction is code generated for several vector lanes
609 /// at a time. If possible instructions are issued as actual vector
610 /// instructions, but e.g. for address calculation instructions we currently
611 /// generate scalar instructions for each vector lane.
612 ///
613 /// @param BlockGen A block generator object used as parent.
614 /// @param Stmt The statement to code generate.
615 /// @param VLTS A mapping from loops virtual canonical induction
616 /// variable to their new values
617 /// (for values recalculated in the new ScoP, but not
618 /// within this basic block), one for each lane.
619 /// @param Schedule A map from the statement to a schedule where the
620 /// innermost dimension is the dimension of the innermost
621 /// loop containing the statement.
622 /// @param NewAccesses A map from memory access ids to new ast expressions,
623 /// which may contain new access expressions for certain
624 /// memory accesses.
625 static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
626 std::vector<LoopToScevMapT> &VLTS,
627 __isl_keep isl_map *Schedule,
628 __isl_keep isl_id_to_ast_expr *NewAccesses) {
629 VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
630 Generator.copyStmt(Stmt, NewAccesses);
631 }
632
633private:
634 // This is a vector of loop->scev maps. The first map is used for the first
635 // vector lane, ...
636 // Each map, contains information about Instructions in the old ScoP, which
637 // are recalculated in the new SCoP. When copying the basic block, we replace
638 // all references to the old instructions with their recalculated values.
639 //
640 // For example, when the code generator produces this AST:
641 //
642 // for (int c1 = 0; c1 <= 1023; c1 += 1)
643 // for (int c2 = 0; c2 <= 1023; c2 += VF)
644 // for (int lane = 0; lane <= VF; lane += 1)
645 // Stmt(c2 + lane + 3, c1);
646 //
647 // VLTS[lane] contains a map:
648 // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
649 // "inner loop in the old loop nest" -> SCEV("c1").
650 std::vector<LoopToScevMapT> &VLTS;
651
652 // A map from the statement to a schedule where the innermost dimension is the
653 // dimension of the innermost loop containing the statement.
654 isl_map *Schedule;
655
656 VectorBlockGenerator(BlockGenerator &BlockGen,
657 std::vector<LoopToScevMapT> &VLTS,
658 __isl_keep isl_map *Schedule);
659
660 int getVectorWidth();
661
662 Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
663 VectorValueMapT &ScalarMaps, Loop *L);
664
665 Type *getVectorPtrTy(const Value *V, int Width);
666
667 /// Load a vector from a set of adjacent scalars
668 ///
669 /// In case a set of scalars is known to be next to each other in memory,
670 /// create a vector load that loads those scalars
671 ///
672 /// %vector_ptr= bitcast double* %p to <4 x double>*
673 /// %vec_full = load <4 x double>* %vector_ptr
674 ///
675 /// @param Stmt The statement to code generate.
676 /// @param NegativeStride This is used to indicate a -1 stride. In such
677 /// a case we load the end of a base address and
678 /// shuffle the accesses in reverse order into the
679 /// vector. By default we would do only positive
680 /// strides.
681 ///
682 /// @param NewAccesses A map from memory access ids to new ast
683 /// expressions, which may contain new access
684 /// expressions for certain memory accesses.
685 Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
686 VectorValueMapT &ScalarMaps,
687 __isl_keep isl_id_to_ast_expr *NewAccesses,
688 bool NegativeStride);
689
690 /// Load a vector initialized from a single scalar in memory
691 ///
692 /// In case all elements of a vector are initialized to the same
693 /// scalar value, this value is loaded and shuffled into all elements
694 /// of the vector.
695 ///
696 /// %splat_one = load <1 x double>* %p
697 /// %splat = shufflevector <1 x double> %splat_one, <1 x
698 /// double> %splat_one, <4 x i32> zeroinitializer
699 ///
700 /// @param NewAccesses A map from memory access ids to new ast expressions,
701 /// which may contain new access expressions for certain
702 /// memory accesses.
703 Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
704 ValueMapT &BBMap,
705 __isl_keep isl_id_to_ast_expr *NewAccesses);
706
707 /// Load a vector from scalars distributed in memory
708 ///
709 /// In case some scalars a distributed randomly in memory. Create a vector
710 /// by loading each scalar and by inserting one after the other into the
711 /// vector.
712 ///
713 /// %scalar_1= load double* %p_1
714 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
715 /// %scalar 2 = load double* %p_2
716 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
717 ///
718 /// @param NewAccesses A map from memory access ids to new ast expressions,
719 /// which may contain new access expressions for certain
720 /// memory accesses.
721 Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
722 VectorValueMapT &ScalarMaps,
723 __isl_keep isl_id_to_ast_expr *NewAccesses);
724
725 /// @param NewAccesses A map from memory access ids to new ast expressions,
726 /// which may contain new access expressions for certain
727 /// memory accesses.
728 void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
729 VectorValueMapT &ScalarMaps,
730 __isl_keep isl_id_to_ast_expr *NewAccesses);
731
732 void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
733 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
734
735 void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
736 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
737
738 /// @param NewAccesses A map from memory access ids to new ast expressions,
739 /// which may contain new access expressions for certain
740 /// memory accesses.
741 void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
742 VectorValueMapT &ScalarMaps,
743 __isl_keep isl_id_to_ast_expr *NewAccesses);
744
745 /// @param NewAccesses A map from memory access ids to new ast expressions,
746 /// which may contain new access expressions for certain
747 /// memory accesses.
748 void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
749 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
750 __isl_keep isl_id_to_ast_expr *NewAccesses);
751
752 bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
753 VectorValueMapT &ScalarMaps);
754
755 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
756
757 /// Generate vector loads for scalars.
758 ///
759 /// @param Stmt The scop statement for which to generate the loads.
760 /// @param VectorBlockMap A map that will be updated to relate the original
761 /// values with the newly generated vector loads.
762 void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
763
764 /// Verify absence of scalar stores.
765 ///
766 /// @param Stmt The scop statement to check for scalar stores.
767 void verifyNoScalarStores(ScopStmt &Stmt);
768
769 /// @param NewAccesses A map from memory access ids to new ast expressions,
770 /// which may contain new access expressions for certain
771 /// memory accesses.
772 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
773 VectorValueMapT &ScalarMaps,
774 __isl_keep isl_id_to_ast_expr *NewAccesses);
775
776 /// @param NewAccesses A map from memory access ids to new ast expressions,
777 /// which may contain new access expressions for certain
778 /// memory accesses.
779 void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
780};
781
782/// Generator for new versions of polyhedral region statements.
783class RegionGenerator : public BlockGenerator {
784public:
785 /// Create a generator for regions.
786 ///
787 /// @param BlockGen A generator for basic blocks.
788 RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
789
790 virtual ~RegionGenerator() {}
791
792 /// Copy the region statement @p Stmt.
793 ///
794 /// This copies the entire region represented by @p Stmt and updates
795 /// references to old values with references to new values, as defined by
796 /// GlobalMap.
797 ///
798 /// @param Stmt The statement to code generate.
799 /// @param LTS A map from old loops to new induction variables as SCEVs.
800 void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
801 __isl_keep isl_id_to_ast_expr *IdToAstExp);
802
803private:
804 /// A map from old to the first new block in the region, that was created to
805 /// model the old basic block.
806 DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
807
808 /// A map from old to the last new block in the region, that was created to
809 /// model the old basic block.
810 DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
811
812 /// The "BBMaps" for the whole region (one for each block). In case a basic
813 /// block is code generated to multiple basic blocks (e.g., for partial
814 /// writes), the StartBasic is used as index for the RegionMap.
815 DenseMap<BasicBlock *, ValueMapT> RegionMaps;
816
817 /// Mapping to remember PHI nodes that still need incoming values.
818 using PHINodePairTy = std::pair<PHINode *, PHINode *>;
819 DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
820
821 /// Repair the dominance tree after we created a copy block for @p BB.
822 ///
823 /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
824 BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
825
826 /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
827 ///
828 /// PHI nodes, which may have (multiple) edges that enter from outside the
829 /// non-affine subregion and even from outside the scop, are code generated as
830 /// follows:
831 ///
832 /// # Original
833 ///
834 /// Region: %A-> %exit
835 /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
836 ///
837 /// pre:
838 /// %val = add i64 1, 1
839 ///
840 /// A:
841 /// br label %nonaff
842 ///
843 /// nonaffB:
844 /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
845 /// %cmp = <nonaff>
846 /// br i1 %cmp, label %C, label %nonaffC
847 ///
848 /// nonaffC:
849 /// %valC = add i64 1, 1
850 /// br i1 undef, label %D, label %nonaffB
851 ///
852 /// D:
853 /// %valD = ...
854 /// %exit_cond = <loopexit>
855 /// br i1 %exit_cond, label %nonaffB, label %exit
856 ///
857 /// exit:
858 /// ...
859 ///
860 /// - %start and %C enter from outside the non-affine region.
861 /// - %nonaffC enters from within the non-affine region.
862 ///
863 /// # New
864 ///
865 /// polly.A:
866 /// store i64 %val, i64* %phi.phiops
867 /// br label %polly.nonaffA.entry
868 ///
869 /// polly.nonaffB.entry:
870 /// %phi.phiops.reload = load i64, i64* %phi.phiops
871 /// br label %nonaffB
872 ///
873 /// polly.nonaffB:
874 /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
875 /// [%p.valC, %polly.nonaffC]
876 ///
877 /// polly.nonaffC:
878 /// %p.valC = add i64 1, 1
879 /// br i1 undef, label %polly.D, label %polly.nonaffB
880 ///
881 /// polly.D:
882 /// %p.valD = ...
883 /// store i64 %p.valD, i64* %phi.phiops
884 /// %p.exit_cond = <loopexit>
885 /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit
886 ///
887 /// Values that enter the PHI from outside the non-affine region are stored
888 /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
889 /// reloaded in %polly.nonaffB.entry, a basic block generated before the
890 /// actual non-affine region.
891 ///
892 /// When generating the PHI node of the non-affine region in %polly.nonaffB,
893 /// incoming edges from outside the region are combined into a single branch
894 /// from %polly.nonaffB.entry which has as incoming value the value reloaded
895 /// from the %phi.phiops stack slot. Incoming edges from within the region
896 /// refer to the copied instructions (%p.valC) and basic blocks
897 /// (%polly.nonaffC) of the non-affine region.
898 ///
899 /// @param Stmt The statement to code generate.
900 /// @param PHI The original PHI we copy.
901 /// @param PHICopy The copy of @p PHI.
902 /// @param IncomingBB An incoming block of @p PHI.
903 /// @param LTS A map from old loops to new induction variables as
904 /// SCEVs.
905 void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
906 BasicBlock *IncomingBB, LoopToScevMapT &LTS);
907
908 /// Create a PHI that combines the incoming values from all incoming blocks
909 /// that are in the subregion.
910 ///
911 /// PHIs in the subregion's exit block can have incoming edges from within and
912 /// outside the subregion. This function combines the incoming values from
913 /// within the subregion to appear as if there is only one incoming edge from
914 /// the subregion (an additional exit block is created by RegionGenerator).
915 /// This is to avoid that a value is written to the .phiops location without
916 /// leaving the subregion because the exiting block as an edge back into the
917 /// subregion.
918 ///
919 /// @param MA The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
920 /// the subregion's exit block.
921 /// @param LTS Virtual induction variable mapping.
922 /// @param BBMap A mapping from old values to their new values in this block.
923 /// @param L Loop surrounding this region statement.
924 ///
925 /// @returns The constructed PHI node.
926 PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
927 Loop *L);
928
929 /// @param Return the new value of a scalar write, creating a PHINode if
930 /// necessary.
931 ///
932 /// @param MA A scalar WRITE MemoryAccess.
933 /// @param LTS Virtual induction variable mapping.
934 /// @param BBMap A mapping from old values to their new values in this block.
935 ///
936 /// @returns The effective value of @p MA's written value when leaving the
937 /// subregion.
938 /// @see buildExitPHI
939 Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
940
941 /// Generate the scalar stores for the given statement.
942 ///
943 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
944 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
945 /// be demoted to memory.
946 ///
947 /// @param Stmt The statement we generate code for.
948 /// @param LTS A mapping from loops virtual canonical induction variable to
949 /// their new values (for values recalculated in the new ScoP,
950 /// but not within this basic block)
951 /// @param BBMap A mapping from old values to their new values in this block.
952 /// @param LTS A mapping from loops virtual canonical induction variable to
953 /// their new values.
954 virtual void
955 generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
956 __isl_keep isl_id_to_ast_expr *NewAccesses) override;
957
958 /// Copy a single PHI instruction.
959 ///
960 /// This copies a single PHI instruction and updates references to old values
961 /// with references to new values, as defined by GlobalMap and BBMap.
962 ///
963 /// @param Stmt The statement to code generate.
964 /// @param PHI The PHI instruction to copy.
965 /// @param BBMap A mapping from old values to their new values
966 /// (for values recalculated within this basic block).
967 /// @param LTS A map from old loops to new induction variables as SCEVs.
968 virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst,
969 ValueMapT &BBMap,
970 LoopToScevMapT &LTS) override;
971};
972} // namespace polly
973#endif