Blame - linux-x64/clang/include/llvm/Frontend/OpenMP/OMPGridValues.h - hafnium/prebuilts

blob: 6b48cc447e131bbbd72b335ca82a307e5325c533 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	//====--- OMPGridValues.h - Language-specific address spaces --- C++ --====//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	///
				9	/// \file
				10	/// \brief Provides definitions for Target specific Grid Values
				11	///
				12	//===----------------------------------------------------------------------===//
				13
				14	#ifndef LLVM_OPENMP_GRIDVALUES_H
				15	#define LLVM_OPENMP_GRIDVALUES_H
				16
				17	namespace llvm {
				18
				19	namespace omp {
				20
				21	/// \brief Defines various target-specific GPU grid values that must be
				22	/// consistent between host RTL (plugin), device RTL, and clang.
				23	/// We can change grid values for a "fat" binary so that different
				24	/// passes get the correct values when generating code for a
				25	/// multi-target binary. Both amdgcn and nvptx values are stored in
				26	/// this file. In the future, should there be differences between GPUs
				27	/// of the same architecture, then simply make a different array and
				28	/// use the new array name.
				29	///
				30	/// Example usage in clang:
				31	/// const unsigned slot_size =
				32	/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
				33	///
				34	/// Example usage in libomptarget/deviceRTLs:
				35	/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
				36	/// #ifdef __AMDGPU__
				37	/// #define GRIDVAL AMDGPUGpuGridValues
				38	/// #else
				39	/// #define GRIDVAL NVPTXGpuGridValues
				40	/// #endif
				41	/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
				42	/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
				43	///
				44	/// Example usage in libomptarget hsa plugin:
				45	/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
				46	/// #define GRIDVAL AMDGPUGpuGridValues
				47	/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
				48	/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
				49	///
				50	/// Example usage in libomptarget cuda plugin:
				51	/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
				52	/// #define GRIDVAL NVPTXGpuGridValues
				53	/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
				54	/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
				55	///
				56	enum GVIDX {
				57	/// The maximum number of workers in a kernel.
				58	/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
				59	GV_Threads,
				60	/// The size reserved for data in a shared memory slot.
				61	GV_Slot_Size,
				62	/// The default value of maximum number of threads in a worker warp.
				63	GV_Warp_Size,
				64	/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
				65	/// for NVPTX.
				66	GV_Warp_Size_32,
				67	/// The number of bits required to represent the max number of threads in warp
				68	GV_Warp_Size_Log2,
				69	/// GV_Warp_Size * GV_Slot_Size,
				70	GV_Warp_Slot_Size,
				71	/// the maximum number of teams.
				72	GV_Max_Teams,
				73	/// Global Memory Alignment
				74	GV_Mem_Align,
				75	/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
				76	GV_Warp_Size_Log2_Mask,
				77	// An alternative to the heavy data sharing infrastructure that uses global
				78	// memory is one that uses device __shared__ memory. The amount of such space
				79	// (in bytes) reserved by the OpenMP runtime is noted here.
				80	GV_SimpleBufferSize,
				81	// The absolute maximum team size for a working group
				82	GV_Max_WG_Size,
				83	// The default maximum team size for a working group
				84	GV_Default_WG_Size,
				85	// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
				86	GV_Max_Warp_Number,
				87	/// The slot size that should be reserved for a working warp.
				88	/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
				89	GV_Warp_Size_Log2_MaskL
				90	};
				91
				92	/// For AMDGPU GPUs
				93	static constexpr unsigned AMDGPUGpuGridValues[] = {
				94	448, // GV_Threads
				95	256, // GV_Slot_Size
				96	64, // GV_Warp_Size
				97	32, // GV_Warp_Size_32
				98	6, // GV_Warp_Size_Log2
				99	64 * 256, // GV_Warp_Slot_Size
				100	128, // GV_Max_Teams
				101	256, // GV_Mem_Align
				102	63, // GV_Warp_Size_Log2_Mask
				103	896, // GV_SimpleBufferSize
				104	1024, // GV_Max_WG_Size,
				105	256, // GV_Defaut_WG_Size
				106	1024 / 64, // GV_Max_WG_Size / GV_WarpSize
				107	63 // GV_Warp_Size_Log2_MaskL
				108	};
				109
				110	/// For Nvidia GPUs
				111	static constexpr unsigned NVPTXGpuGridValues[] = {
				112	992, // GV_Threads
				113	256, // GV_Slot_Size
				114	32, // GV_Warp_Size
				115	32, // GV_Warp_Size_32
				116	5, // GV_Warp_Size_Log2
				117	32 * 256, // GV_Warp_Slot_Size
				118	1024, // GV_Max_Teams
				119	256, // GV_Mem_Align
				120	(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
				121	896, // GV_SimpleBufferSize
				122	1024, // GV_Max_WG_Size
				123	128, // GV_Defaut_WG_Size
				124	1024 / 32, // GV_Max_WG_Size / GV_WarpSize
				125	31 // GV_Warp_Size_Log2_MaskL
				126	};
				127
				128	} // namespace omp
				129	} // namespace llvm
				130
				131	#endif // LLVM_OPENMP_GRIDVALUES_H