blob: 46cf82524e5759cdb351f3ffdae9c406bed8ce97 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9// This file declares helper functions for running LLVM in a multi-threaded
10// environment.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_THREADING_H
15#define LLVM_SUPPORT_THREADING_H
16
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020017#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/FunctionExtras.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010019#include "llvm/ADT/SmallVector.h"
20#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
21#include "llvm/Support/Compiler.h"
22#include <ciso646> // So we can check the C++ standard lib macros.
23#include <functional>
24
25#if defined(_MSC_VER)
26// MSVC's call_once implementation worked since VS 2015, which is the minimum
27// supported version as of this writing.
28#define LLVM_THREADING_USE_STD_CALL_ONCE 1
29#elif defined(LLVM_ON_UNIX) && \
30 (defined(_LIBCPP_VERSION) || \
Andrew Walbran16937d02019-10-22 13:54:20 +010031 !(defined(__NetBSD__) || defined(__OpenBSD__) || \
32 (defined(__ppc__) || defined(__PPC__))))
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010033// std::call_once from libc++ is used on all Unix platforms. Other
34// implementations like libstdc++ are known to have problems on NetBSD,
35// OpenBSD and PowerPC.
36#define LLVM_THREADING_USE_STD_CALL_ONCE 1
Andrew Walbran3d2c1972020-04-07 12:24:26 +010037#elif defined(LLVM_ON_UNIX) && \
38 ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
39#define LLVM_THREADING_USE_STD_CALL_ONCE 1
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010040#else
41#define LLVM_THREADING_USE_STD_CALL_ONCE 0
42#endif
43
44#if LLVM_THREADING_USE_STD_CALL_ONCE
45#include <mutex>
46#else
47#include "llvm/Support/Atomic.h"
48#endif
49
50namespace llvm {
51class Twine;
52
53/// Returns true if LLVM is compiled with support for multi-threading, and
54/// false otherwise.
55bool llvm_is_multithreaded();
56
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020057/// Execute the given \p UserFn on a separate thread, passing it the provided \p
58/// UserData and waits for thread completion.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010059///
60/// This function does not guarantee that the code will actually be executed
61/// on a separate thread or honoring the requested stack size, but tries to do
62/// so where system support is available.
63///
64/// \param UserFn - The callback to execute.
65/// \param UserData - An argument to pass to the callback function.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020066/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
67/// (or None for default)
68void llvm_execute_on_thread(
69 void (*UserFn)(void *), void *UserData,
70 llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
71
72/// Schedule the given \p Func for execution on a separate thread, then return
73/// to the caller immediately. Roughly equivalent to
74/// `std::thread(Func).detach()`, except it allows requesting a specific stack
75/// size, if supported for the platform.
76///
77/// This function would report a fatal error if it can't execute the code
78/// on a separate thread.
79///
80/// \param Func - The callback to execute.
81/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
82/// (or None for default)
83void llvm_execute_on_thread_async(
84 llvm::unique_function<void()> Func,
85 llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010086
87#if LLVM_THREADING_USE_STD_CALL_ONCE
88
89 typedef std::once_flag once_flag;
90
91#else
92
93 enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
94
Andrew Scullcdfcccc2018-10-05 20:58:37 +010095 /// The llvm::once_flag structure
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010096 ///
97 /// This type is modeled after std::once_flag to use with llvm::call_once.
98 /// This structure must be used as an opaque object. It is a struct to force
99 /// autoinitialization and behave like std::once_flag.
100 struct once_flag {
101 volatile sys::cas_flag status = Uninitialized;
102 };
103
104#endif
105
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100106 /// Execute the function specified as a parameter once.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100107 ///
108 /// Typical usage:
109 /// \code
110 /// void foo() {...};
111 /// ...
112 /// static once_flag flag;
113 /// call_once(flag, foo);
114 /// \endcode
115 ///
116 /// \param flag Flag used for tracking whether or not this has run.
117 /// \param F Function to call once.
118 template <typename Function, typename... Args>
119 void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
120#if LLVM_THREADING_USE_STD_CALL_ONCE
121 std::call_once(flag, std::forward<Function>(F),
122 std::forward<Args>(ArgList)...);
123#else
124 // For other platforms we use a generic (if brittle) version based on our
125 // atomics.
126 sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
127 if (old_val == Uninitialized) {
128 std::forward<Function>(F)(std::forward<Args>(ArgList)...);
129 sys::MemoryFence();
130 TsanIgnoreWritesBegin();
131 TsanHappensBefore(&flag.status);
132 flag.status = Done;
133 TsanIgnoreWritesEnd();
134 } else {
135 // Wait until any thread doing the call has finished.
136 sys::cas_flag tmp = flag.status;
137 sys::MemoryFence();
138 while (tmp != Done) {
139 tmp = flag.status;
140 sys::MemoryFence();
141 }
142 }
143 TsanHappensAfter(&flag.status);
144#endif
145 }
146
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200147 /// This tells how a thread pool will be used
148 class ThreadPoolStrategy {
149 public:
150 // The default value (0) means all available threads should be used,
151 // taking the affinity mask into account. If set, this value only represents
152 // a suggested high bound, the runtime might choose a lower value (not
153 // higher).
154 unsigned ThreadsRequested = 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100155
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200156 // If SMT is active, use hyper threads. If false, there will be only one
157 // std::thread per core.
158 bool UseHyperThreads = true;
159
160 // If set, will constrain 'ThreadsRequested' to the number of hardware
161 // threads, or hardware cores.
162 bool Limit = false;
163
164 /// Retrieves the max available threads for the current strategy. This
165 /// accounts for affinity masks and takes advantage of all CPU sockets.
166 unsigned compute_thread_count() const;
167
168 /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
169 /// multi-socket system, this ensures threads are assigned to all CPU
170 /// sockets. \p ThreadPoolNum represents a number bounded by [0,
171 /// compute_thread_count()).
172 void apply_thread_strategy(unsigned ThreadPoolNum) const;
173
174 /// Finds the CPU socket where a thread should go. Returns 'None' if the
175 /// thread shall remain on the actual CPU socket.
176 Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
177 };
178
179 /// Build a strategy from a number of threads as a string provided in \p Num.
180 /// When Num is above the max number of threads specified by the \p Default
181 /// strategy, we attempt to equally allocate the threads on all CPU sockets.
182 /// "0" or an empty string will return the \p Default strategy.
183 /// "all" for using all hardware threads.
184 Optional<ThreadPoolStrategy>
185 get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
186
187 /// Returns a thread strategy for tasks requiring significant memory or other
188 /// resources. To be used for workloads where hardware_concurrency() proves to
189 /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
190 /// based on physical cores, if available for the host system, otherwise falls
191 /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
192 /// LLVM_ENABLE_THREADS = OFF.
193 inline ThreadPoolStrategy
194 heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
195 ThreadPoolStrategy S;
196 S.UseHyperThreads = false;
197 S.ThreadsRequested = ThreadCount;
198 return S;
199 }
200
201 /// Like heavyweight_hardware_concurrency() above, but builds a strategy
202 /// based on the rules described for get_threadpool_strategy().
203 /// If \p Num is invalid, returns a default strategy where one thread per
204 /// hardware core is used.
205 inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
206 Optional<ThreadPoolStrategy> S =
207 get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
208 if (S)
209 return *S;
210 return heavyweight_hardware_concurrency();
211 }
212
213 /// Returns a default thread strategy where all available hardware resources
214 /// are to be used, except for those initially excluded by an affinity mask.
215 /// This function takes affinity into consideration. Returns 1 when LLVM is
216 /// configured with LLVM_ENABLE_THREADS=OFF.
217 inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
218 ThreadPoolStrategy S;
219 S.ThreadsRequested = ThreadCount;
220 return S;
221 }
222
223 /// Returns an optimal thread strategy to execute specified amount of tasks.
224 /// This strategy should prevent us from creating too many threads if we
225 /// occasionaly have an unexpectedly small amount of tasks.
226 inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
227 ThreadPoolStrategy S;
228 S.Limit = true;
229 S.ThreadsRequested = TaskCount;
230 return S;
231 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100232
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100233 /// Return the current thread id, as used in various OS system calls.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100234 /// Note that not all platforms guarantee that the value returned will be
235 /// unique across the entire system, so portable code should not assume
236 /// this.
237 uint64_t get_threadid();
238
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100239 /// Get the maximum length of a thread name on this platform.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100240 /// A value of 0 means there is no limit.
241 uint32_t get_max_thread_name_length();
242
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100243 /// Set the name of the current thread. Setting a thread's name can
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100244 /// be helpful for enabling useful diagnostics under a debugger or when
245 /// logging. The level of support for setting a thread's name varies
246 /// wildly across operating systems, and we only make a best effort to
247 /// perform the operation on supported platforms. No indication of success
248 /// or failure is returned.
249 void set_thread_name(const Twine &Name);
250
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100251 /// Get the name of the current thread. The level of support for
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100252 /// getting a thread's name varies wildly across operating systems, and it
253 /// is not even guaranteed that if you can successfully set a thread's name
254 /// that you can later get it back. This function is intended for diagnostic
255 /// purposes, and as with setting a thread's name no indication of whether
256 /// the operation succeeded or failed is returned.
257 void get_thread_name(SmallVectorImpl<char> &Name);
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100258
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200259 /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
260 /// group, the calling thread can be executed. On Windows, threads cannot
261 /// cross CPU sockets boundaries.
262 llvm::BitVector get_thread_affinity_mask();
263
264 /// Returns how many physical CPUs or NUMA groups the system has.
265 unsigned get_cpus();
266
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100267 enum class ThreadPriority {
268 Background = 0,
269 Default = 1,
270 };
271 /// If priority is Background tries to lower current threads priority such
272 /// that it does not affect foreground tasks significantly. Can be used for
273 /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
274 /// this task.
275 /// If the priority is default tries to restore current threads priority to
276 /// default scheduling priority.
277 enum class SetThreadPriorityResult { FAILURE, SUCCESS };
278 SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100279}
280
281#endif