blob: 6b48cc447e131bbbd72b335ca82a307e5325c533 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// \brief Provides definitions for Target specific Grid Values
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_OPENMP_GRIDVALUES_H
15#define LLVM_OPENMP_GRIDVALUES_H
16
17namespace llvm {
18
19namespace omp {
20
21/// \brief Defines various target-specific GPU grid values that must be
22/// consistent between host RTL (plugin), device RTL, and clang.
23/// We can change grid values for a "fat" binary so that different
24/// passes get the correct values when generating code for a
25/// multi-target binary. Both amdgcn and nvptx values are stored in
26/// this file. In the future, should there be differences between GPUs
27/// of the same architecture, then simply make a different array and
28/// use the new array name.
29///
30/// Example usage in clang:
31/// const unsigned slot_size =
32/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
33///
34/// Example usage in libomptarget/deviceRTLs:
35/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
36/// #ifdef __AMDGPU__
37/// #define GRIDVAL AMDGPUGpuGridValues
38/// #else
39/// #define GRIDVAL NVPTXGpuGridValues
40/// #endif
41/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
43///
44/// Example usage in libomptarget hsa plugin:
45/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
46/// #define GRIDVAL AMDGPUGpuGridValues
47/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
49///
50/// Example usage in libomptarget cuda plugin:
51/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
52/// #define GRIDVAL NVPTXGpuGridValues
53/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
55///
56enum GVIDX {
57 /// The maximum number of workers in a kernel.
58 /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
59 GV_Threads,
60 /// The size reserved for data in a shared memory slot.
61 GV_Slot_Size,
62 /// The default value of maximum number of threads in a worker warp.
63 GV_Warp_Size,
64 /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
65 /// for NVPTX.
66 GV_Warp_Size_32,
67 /// The number of bits required to represent the max number of threads in warp
68 GV_Warp_Size_Log2,
69 /// GV_Warp_Size * GV_Slot_Size,
70 GV_Warp_Slot_Size,
71 /// the maximum number of teams.
72 GV_Max_Teams,
73 /// Global Memory Alignment
74 GV_Mem_Align,
75 /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
76 GV_Warp_Size_Log2_Mask,
77 // An alternative to the heavy data sharing infrastructure that uses global
78 // memory is one that uses device __shared__ memory. The amount of such space
79 // (in bytes) reserved by the OpenMP runtime is noted here.
80 GV_SimpleBufferSize,
81 // The absolute maximum team size for a working group
82 GV_Max_WG_Size,
83 // The default maximum team size for a working group
84 GV_Default_WG_Size,
85 // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
86 GV_Max_Warp_Number,
87 /// The slot size that should be reserved for a working warp.
88 /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
89 GV_Warp_Size_Log2_MaskL
90};
91
92/// For AMDGPU GPUs
93static constexpr unsigned AMDGPUGpuGridValues[] = {
94 448, // GV_Threads
95 256, // GV_Slot_Size
96 64, // GV_Warp_Size
97 32, // GV_Warp_Size_32
98 6, // GV_Warp_Size_Log2
99 64 * 256, // GV_Warp_Slot_Size
100 128, // GV_Max_Teams
101 256, // GV_Mem_Align
102 63, // GV_Warp_Size_Log2_Mask
103 896, // GV_SimpleBufferSize
104 1024, // GV_Max_WG_Size,
105 256, // GV_Defaut_WG_Size
106 1024 / 64, // GV_Max_WG_Size / GV_WarpSize
107 63 // GV_Warp_Size_Log2_MaskL
108};
109
110/// For Nvidia GPUs
111static constexpr unsigned NVPTXGpuGridValues[] = {
112 992, // GV_Threads
113 256, // GV_Slot_Size
114 32, // GV_Warp_Size
115 32, // GV_Warp_Size_32
116 5, // GV_Warp_Size_Log2
117 32 * 256, // GV_Warp_Slot_Size
118 1024, // GV_Max_Teams
119 256, // GV_Mem_Align
120 (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
121 896, // GV_SimpleBufferSize
122 1024, // GV_Max_WG_Size
123 128, // GV_Defaut_WG_Size
124 1024 / 32, // GV_Max_WG_Size / GV_WarpSize
125 31 // GV_Warp_Size_Log2_MaskL
126};
127
128} // namespace omp
129} // namespace llvm
130
131#endif // LLVM_OPENMP_GRIDVALUES_H