Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253..11726ec 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -137,12 +137,13 @@
u8 *data;
pthread_mutex_t startup_mutex;
+ pthread_cond_t startup_cond;
int nr_tasks_started;
- pthread_mutex_t startup_done_mutex;
-
pthread_mutex_t start_work_mutex;
+ pthread_cond_t start_work_cond;
int nr_tasks_working;
+ bool start_work;
pthread_mutex_t stop_work_mutex;
u64 bytes_done;
@@ -247,17 +248,22 @@
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +294,10 @@
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +307,16 @@
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
+ struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(cpu_stop > g->p.nr_cpus);
-
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -479,6 +484,18 @@
pthread_mutex_init(mutex, &attr);
}
+/*
+ * Return a process-shared (global) condition variable:
+ */
+static void init_global_cond(pthread_cond_t *cond)
+{
+ pthread_condattr_t attr;
+
+ pthread_condattr_init(&attr);
+ pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init(cond, &attr);
+}
+
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
@@ -729,8 +746,6 @@
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +828,12 @@
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +851,6 @@
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1135,15 +1149,18 @@
if (g->p.serialize_startup) {
pthread_mutex_lock(&g->startup_mutex);
g->nr_tasks_started++;
+ /* The last thread wakes the main process. */
+ if (g->nr_tasks_started == g->p.nr_tasks)
+ pthread_cond_signal(&g->startup_cond);
+
pthread_mutex_unlock(&g->startup_mutex);
/* Here we will wait for the main process to start us all at once: */
pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = false;
g->nr_tasks_working++;
-
- /* Last one wake the main process: */
- if (g->nr_tasks_working == g->p.nr_tasks)
- pthread_mutex_unlock(&g->startup_done_mutex);
+ while (!g->start_work)
+ pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
pthread_mutex_unlock(&g->start_work_mutex);
}
@@ -1440,8 +1457,9 @@
/* Startup serialization: */
init_global_mutex(&g->start_work_mutex);
+ init_global_cond(&g->start_work_cond);
init_global_mutex(&g->startup_mutex);
- init_global_mutex(&g->startup_done_mutex);
+ init_global_cond(&g->startup_cond);
init_global_mutex(&g->stop_work_mutex);
init_thread_data();
@@ -1501,9 +1519,6 @@
pids = zalloc(g->p.nr_proc * sizeof(*pids));
pid = -1;
- /* All threads try to acquire it, this way we can wait for them to start up: */
- pthread_mutex_lock(&g->start_work_mutex);
-
if (g->p.serialize_startup) {
tprintf(" #\n");
tprintf(" # Startup synchronization: ..."); fflush(stdout);
@@ -1525,22 +1540,29 @@
pids[i] = pid;
}
- /* Wait for all the threads to start up: */
- while (g->nr_tasks_started != g->p.nr_tasks)
- usleep(USEC_PER_MSEC);
-
- BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
if (g->p.serialize_startup) {
+ bool threads_ready = false;
double startup_sec;
- pthread_mutex_lock(&g->startup_done_mutex);
+ /*
+ * Wait for all the threads to start up. The last thread will
+ * signal this process.
+ */
+ pthread_mutex_lock(&g->startup_mutex);
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
- /* This will start all threads: */
- pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_mutex_unlock(&g->startup_mutex);
- /* This mutex is locked - the last started thread will wake us: */
- pthread_mutex_lock(&g->startup_done_mutex);
+ /* Wait for all threads to be at the start_work_cond. */
+ while (!threads_ready) {
+ pthread_mutex_lock(&g->start_work_mutex);
+ threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
+ pthread_mutex_unlock(&g->start_work_mutex);
+ if (!threads_ready)
+ usleep(1);
+ }
gettimeofday(&stop, NULL);
@@ -1554,7 +1576,11 @@
tprintf(" #\n");
start = stop;
- pthread_mutex_unlock(&g->startup_done_mutex);
+ /* Start all threads running. */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = true;
+ pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_cond_broadcast(&g->start_work_cond);
} else {
gettimeofday(&start, NULL);
}
@@ -1733,12 +1759,12 @@
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1781,7 @@
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
@@ -1780,24 +1806,24 @@
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
- { " 4x8-bw-thread-NOTHP,",
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
{ "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
{ "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
{ "numa01-bw-thread-NOTHP,",