Update prebuilt Clang to r416183b from Android.

https://android.googlesource.com/platform/prebuilts/clang/host/
linux-x86/+/06a71ddac05c22edb2d10b590e1769b3f8619bef

clang 12.0.5 (based on r416183b) from build 7284624.

Change-Id: I277a316abcf47307562d8b748b84870f31a72866
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/linux-x64/clang/include/llvm/Support/Threading.h b/linux-x64/clang/include/llvm/Support/Threading.h
index 46d413d..46cf825 100644
--- a/linux-x64/clang/include/llvm/Support/Threading.h
+++ b/linux-x64/clang/include/llvm/Support/Threading.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_SUPPORT_THREADING_H
 #define LLVM_SUPPORT_THREADING_H
 
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Compiler.h"
@@ -52,9 +54,8 @@
 /// false otherwise.
 bool llvm_is_multithreaded();
 
-/// llvm_execute_on_thread - Execute the given \p UserFn on a separate
-/// thread, passing it the provided \p UserData and waits for thread
-/// completion.
+/// Execute the given \p UserFn on a separate thread, passing it the provided \p
+/// UserData and waits for thread completion.
 ///
 /// This function does not guarantee that the code will actually be executed
 /// on a separate thread or honoring the requested stack size, but tries to do
@@ -62,10 +63,26 @@
 ///
 /// \param UserFn - The callback to execute.
 /// \param UserData - An argument to pass to the callback function.
-/// \param RequestedStackSize - If non-zero, a requested size (in bytes) for
-/// the thread stack.
-void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
-                            unsigned RequestedStackSize = 0);
+/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
+/// (or None for default)
+void llvm_execute_on_thread(
+    void (*UserFn)(void *), void *UserData,
+    llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
+
+/// Schedule the given \p Func for execution on a separate thread, then return
+/// to the caller immediately. Roughly equivalent to
+/// `std::thread(Func).detach()`, except it allows requesting a specific stack
+/// size, if supported for the platform.
+///
+/// This function would report a fatal error if it can't execute the code
+/// on a separate thread.
+///
+/// \param Func - The callback to execute.
+/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
+/// (or None for default)
+void llvm_execute_on_thread_async(
+    llvm::unique_function<void()> Func,
+    llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
 
 #if LLVM_THREADING_USE_STD_CALL_ONCE
 
@@ -127,20 +144,91 @@
 #endif
   }
 
-  /// Get the amount of currency to use for tasks requiring significant
-  /// memory or other resources. Currently based on physical cores, if
-  /// available for the host system, otherwise falls back to
-  /// thread::hardware_concurrency().
-  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
-  unsigned heavyweight_hardware_concurrency();
+  /// This tells how a thread pool will be used
+  class ThreadPoolStrategy {
+  public:
+    // The default value (0) means all available threads should be used,
+    // taking the affinity mask into account. If set, this value only represents
+    // a suggested high bound, the runtime might choose a lower value (not
+    // higher).
+    unsigned ThreadsRequested = 0;
 
-  /// Get the number of threads that the current program can execute
-  /// concurrently. On some systems std::thread::hardware_concurrency() returns
-  /// the total number of cores, without taking affinity into consideration.
-  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF.
-  /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is
-  /// not available.
-  unsigned hardware_concurrency();
+    // If SMT is active, use hyper threads. If false, there will be only one
+    // std::thread per core.
+    bool UseHyperThreads = true;
+
+    // If set, will constrain 'ThreadsRequested' to the number of hardware
+    // threads, or hardware cores.
+    bool Limit = false;
+
+    /// Retrieves the max available threads for the current strategy. This
+    /// accounts for affinity masks and takes advantage of all CPU sockets.
+    unsigned compute_thread_count() const;
+
+    /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
+    /// multi-socket system, this ensures threads are assigned to all CPU
+    /// sockets. \p ThreadPoolNum represents a number bounded by [0,
+    /// compute_thread_count()).
+    void apply_thread_strategy(unsigned ThreadPoolNum) const;
+
+    /// Finds the CPU socket where a thread should go. Returns 'None' if the
+    /// thread shall remain on the actual CPU socket.
+    Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
+  };
+
+  /// Build a strategy from a number of threads as a string provided in \p Num.
+  /// When Num is above the max number of threads specified by the \p Default
+  /// strategy, we attempt to equally allocate the threads on all CPU sockets.
+  /// "0" or an empty string will return the \p Default strategy.
+  /// "all" for using all hardware threads.
+  Optional<ThreadPoolStrategy>
+  get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
+
+  /// Returns a thread strategy for tasks requiring significant memory or other
+  /// resources. To be used for workloads where hardware_concurrency() proves to
+  /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
+  /// based on physical cores, if available for the host system, otherwise falls
+  /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
+  /// LLVM_ENABLE_THREADS = OFF.
+  inline ThreadPoolStrategy
+  heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
+    ThreadPoolStrategy S;
+    S.UseHyperThreads = false;
+    S.ThreadsRequested = ThreadCount;
+    return S;
+  }
+
+  /// Like heavyweight_hardware_concurrency() above, but builds a strategy
+  /// based on the rules described for get_threadpool_strategy().
+  /// If \p Num is invalid, returns a default strategy where one thread per
+  /// hardware core is used.
+  inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
+    Optional<ThreadPoolStrategy> S =
+        get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
+    if (S)
+      return *S;
+    return heavyweight_hardware_concurrency();
+  }
+
+  /// Returns a default thread strategy where all available hardware resources
+  /// are to be used, except for those initially excluded by an affinity mask.
+  /// This function takes affinity into consideration. Returns 1 when LLVM is
+  /// configured with LLVM_ENABLE_THREADS=OFF.
+  inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
+    ThreadPoolStrategy S;
+    S.ThreadsRequested = ThreadCount;
+    return S;
+  }
+
+  /// Returns an optimal thread strategy to execute specified amount of tasks.
+  /// This strategy should prevent us from creating too many threads if we
+  /// occasionaly have an unexpectedly small amount of tasks.
+  inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
+    ThreadPoolStrategy S;
+    S.Limit = true;
+    S.ThreadsRequested = TaskCount;
+    return S;
+  }
 
   /// Return the current thread id, as used in various OS system calls.
   /// Note that not all platforms guarantee that the value returned will be
@@ -168,6 +256,14 @@
   /// the operation succeeded or failed is returned.
   void get_thread_name(SmallVectorImpl<char> &Name);
 
+  /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
+  /// group, the calling thread can be executed. On Windows, threads cannot
+  /// cross CPU sockets boundaries.
+  llvm::BitVector get_thread_affinity_mask();
+
+  /// Returns how many physical CPUs or NUMA groups the system has.
+  unsigned get_cpus();
+
   enum class ThreadPriority {
     Background = 0,
     Default = 1,