Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 1 | //===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains functions to create scalar and OpenMP parallel loops |
| 10 | // as LLVM-IR. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #ifndef POLLY_LOOP_GENERATORS_KMP_H |
| 14 | #define POLLY_LOOP_GENERATORS_KMP_H |
| 15 | |
| 16 | #include "polly/CodeGen/IRBuilder.h" |
| 17 | #include "polly/CodeGen/LoopGenerators.h" |
| 18 | #include "polly/Support/ScopHelper.h" |
| 19 | #include "llvm/ADT/SetVector.h" |
| 20 | |
| 21 | namespace polly { |
| 22 | using namespace llvm; |
| 23 | |
| 24 | /// This ParallelLoopGenerator subclass handles the generation of parallelized |
| 25 | /// code, utilizing the LLVM OpenMP library. |
| 26 | class ParallelLoopGeneratorKMP : public ParallelLoopGenerator { |
| 27 | public: |
| 28 | /// Create a parallel loop generator for the current function. |
| 29 | ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI, |
| 30 | DominatorTree &DT, const DataLayout &DL) |
| 31 | : ParallelLoopGenerator(Builder, LI, DT, DL) { |
| 32 | SourceLocationInfo = createSourceLocation(); |
| 33 | } |
| 34 | |
| 35 | protected: |
| 36 | /// The source location struct of this loop. |
| 37 | /// ident_t = type { i32, i32, i32, i32, i8* } |
| 38 | GlobalValue *SourceLocationInfo; |
| 39 | |
| 40 | /// Convert the combination of given chunk size and scheduling type (which |
| 41 | /// might have been set via the command line) into the corresponding |
| 42 | /// scheduling type. This may result (e.g.) in a 'change' from |
| 43 | /// "static chunked" scheduling to "static non-chunked" (regarding the |
| 44 | /// provided and returned scheduling types). |
| 45 | /// |
| 46 | /// @param ChunkSize The chunk size, set via command line or its default. |
| 47 | /// @param Scheduling The scheduling, set via command line or its default. |
| 48 | /// |
| 49 | /// @return The corresponding OMPGeneralSchedulingType. |
| 50 | OMPGeneralSchedulingType |
| 51 | getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const; |
| 52 | |
| 53 | /// Returns True if 'LongType' is 64bit wide, otherwise: False. |
| 54 | bool is64BitArch(); |
| 55 | |
| 56 | public: |
| 57 | // The functions below may be used if one does not want to generate a |
| 58 | // specific OpenMP parallel loop, but generate individual parts of it |
| 59 | // (e.g. the subfunction definition). |
| 60 | |
| 61 | /// Create a runtime library call to spawn the worker threads. |
| 62 | /// |
| 63 | /// @param SubFn The subfunction which holds the loop body. |
| 64 | /// @param SubFnParam The parameter for the subfunction (basically the struct |
| 65 | /// filled with the outside values). |
| 66 | /// @param LB The lower bound for the loop we parallelize. |
| 67 | /// @param UB The upper bound for the loop we parallelize. |
| 68 | /// @param Stride The stride of the loop we parallelize. |
| 69 | void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, |
| 70 | Value *UB, Value *Stride); |
| 71 | |
| 72 | void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB, |
| 73 | Value *UB, Value *Stride) override; |
| 74 | |
| 75 | virtual Function *prepareSubFnDefinition(Function *F) const override; |
| 76 | |
| 77 | std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct, |
| 78 | SetVector<Value *> UsedValues, |
| 79 | ValueMapT &VMap) override; |
| 80 | |
| 81 | /// Create a runtime library call to get the current global thread number. |
| 82 | /// |
| 83 | /// @return A Value ref which holds the current global thread number. |
| 84 | Value *createCallGlobalThreadNum(); |
| 85 | |
| 86 | /// Create a runtime library call to request a number of threads. |
| 87 | /// Which will be used in the next OpenMP section (by the next fork). |
| 88 | /// |
| 89 | /// @param GlobalThreadID The global thread ID. |
| 90 | /// @param NumThreads The number of threads to use. |
| 91 | void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads); |
| 92 | |
| 93 | /// Create a runtime library call to prepare the OpenMP runtime. |
| 94 | /// For dynamically scheduled loops, saving the loop arguments. |
| 95 | /// |
| 96 | /// @param GlobalThreadID The global thread ID. |
| 97 | /// @param LB The loop's lower bound. |
| 98 | /// @param UB The loop's upper bound. |
| 99 | /// @param Inc The loop increment. |
| 100 | /// @param ChunkSize The chunk size of the parallel loop. |
| 101 | void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB, |
| 102 | Value *Inc, Value *ChunkSize); |
| 103 | |
| 104 | /// Create a runtime library call to retrieve the next (dynamically) |
| 105 | /// allocated chunk of work for this thread. |
| 106 | /// |
| 107 | /// @param GlobalThreadID The global thread ID. |
| 108 | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
| 109 | /// the last chunk of work, or 0 otherwise. |
| 110 | /// @param LBPtr Pointer to the lower bound for the next chunk. |
| 111 | /// @param UBPtr Pointer to the upper bound for the next chunk. |
| 112 | /// @param StridePtr Pointer to the stride for the next chunk. |
| 113 | /// |
| 114 | /// @return A Value which holds 1 if there is work to be done, 0 otherwise. |
| 115 | Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr, |
| 116 | Value *LBPtr, Value *UBPtr, Value *StridePtr); |
| 117 | |
| 118 | /// Create a runtime library call to prepare the OpenMP runtime. |
| 119 | /// For statically scheduled loops, saving the loop arguments. |
| 120 | /// |
| 121 | /// @param GlobalThreadID The global thread ID. |
| 122 | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
| 123 | /// the last chunk of work, or 0 otherwise. |
| 124 | /// @param LBPtr Pointer to the lower bound for the next chunk. |
| 125 | /// @param UBPtr Pointer to the upper bound for the next chunk. |
| 126 | /// @param StridePtr Pointer to the stride for the next chunk. |
| 127 | /// @param ChunkSize The chunk size of the parallel loop. |
| 128 | void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr, |
| 129 | Value *LBPtr, Value *UBPtr, Value *StridePtr, |
| 130 | Value *ChunkSize); |
| 131 | |
| 132 | /// Create a runtime library call to mark the end of |
| 133 | /// a statically scheduled loop. |
| 134 | /// |
| 135 | /// @param GlobalThreadID The global thread ID. |
| 136 | void createCallStaticFini(Value *GlobalThreadID); |
| 137 | |
| 138 | /// Create the current source location. |
| 139 | /// |
| 140 | /// TODO: Generates only(!) dummy values. |
| 141 | GlobalVariable *createSourceLocation(); |
| 142 | }; |
| 143 | } // end namespace polly |
| 144 | #endif |