Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 61c1fad..ea6adbf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SMP support for ppc.
*
@@ -8,11 +9,6 @@
*
* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -20,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
@@ -34,6 +31,8 @@
#include <linux/topology.h>
#include <linux/profile.h>
#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -73,15 +72,33 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
+bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -338,13 +355,12 @@
* NMI IPIs may not be recoverable, so should not be used as ongoing part of
* a running system. They can be used for crash, debug, halt/reboot, etc.
*
- * NMI IPIs are globally single threaded. No more than one in progress at
- * any time.
- *
* The IPI call waits with interrupts disabled until all targets enter the
- * NMI handler, then the call returns.
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
*
- * No new NMI can be initiated until targets exit the handler.
+ * A new NMI can be issued before all targets exit the handler.
*
* The IPI call may time out without all targets entering the NMI handler.
* In that case, there is some logic to recover (and ignore subsequent
@@ -355,7 +371,7 @@
static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
-static int nmi_ipi_busy_count = 0;
+static bool nmi_ipi_busy = false;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
static void nmi_ipi_lock_start(unsigned long *flags)
@@ -394,7 +410,7 @@
*/
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
- void (*fn)(struct pt_regs *);
+ void (*fn)(struct pt_regs *) = NULL;
unsigned long flags;
int me = raw_smp_processor_id();
int ret = 0;
@@ -405,29 +421,17 @@
* because the caller may have timed out.
*/
nmi_ipi_lock_start(&flags);
- if (!nmi_ipi_busy_count)
- goto out;
- if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
- goto out;
-
- fn = nmi_ipi_function;
- if (!fn)
- goto out;
-
- cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
- nmi_ipi_busy_count++;
- nmi_ipi_unlock();
-
- ret = 1;
-
- fn(regs);
-
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */
- nmi_ipi_busy_count--;
-out:
+ if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ fn = READ_ONCE(nmi_ipi_function);
+ WARN_ON_ONCE(!fn);
+ ret = 1;
+ }
nmi_ipi_unlock_end(&flags);
+ if (fn)
+ fn(regs);
+
return ret;
}
@@ -453,9 +457,10 @@
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
- * complete executing the handler, == 0 specifies indefinite delay.
+ * begin executing the handler, == 0 specifies indefinite delay.
*/
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
{
unsigned long flags;
int me = raw_smp_processor_id();
@@ -467,31 +472,33 @@
if (unlikely(!smp_ops))
return 0;
- /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
nmi_ipi_lock_start(&flags);
- while (nmi_ipi_busy_count) {
+ while (nmi_ipi_busy) {
nmi_ipi_unlock_end(&flags);
- spin_until_cond(nmi_ipi_busy_count == 0);
+ spin_until_cond(!nmi_ipi_busy);
nmi_ipi_lock_start(&flags);
}
-
+ nmi_ipi_busy = true;
nmi_ipi_function = fn;
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
if (cpu < 0) {
/* ALL_OTHERS */
cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
} else {
- /* cpumask starts clear */
cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
}
- nmi_ipi_busy_count++;
+
nmi_ipi_unlock();
+ /* Interrupts remain hard disabled */
+
do_smp_send_nmi_ipi(cpu, safe);
nmi_ipi_lock();
- /* nmi_ipi_busy_count is held here, so unlock/lock is okay */
+ /* nmi_ipi_busy is set here, so unlock/lock is okay */
while (!cpumask_empty(&nmi_ipi_pending_mask)) {
nmi_ipi_unlock();
udelay(1);
@@ -503,29 +510,15 @@
}
}
- while (nmi_ipi_busy_count > 1) {
- nmi_ipi_unlock();
- udelay(1);
- nmi_ipi_lock();
- if (delay_us) {
- delay_us--;
- if (!delay_us)
- break;
- }
- }
-
if (!cpumask_empty(&nmi_ipi_pending_mask)) {
/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
ret = 0;
cpumask_clear(&nmi_ipi_pending_mask);
}
- if (nmi_ipi_busy_count > 1) {
- /* Timeout waiting for CPUs to execute fn */
- ret = 0;
- nmi_ipi_busy_count = 1;
- }
- nmi_ipi_busy_count--;
+ nmi_ipi_function = NULL;
+ nmi_ipi_busy = false;
+
nmi_ipi_unlock_end(&flags);
return ret;
@@ -593,17 +586,8 @@
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
- * This is a special case because it never returns, so the NMI IPI
- * handling would never mark it as done, which makes any later
- * smp_send_nmi_ipi() call spin forever. Mark it done now.
- *
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1)
- nmi_ipi_busy_count--;
- nmi_ipi_unlock();
-
spin_begin();
while (1)
spin_cpu_relax();
@@ -643,7 +627,7 @@
}
#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
@@ -674,6 +658,185 @@
}
#endif
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups
+ * structure @tg if the ibm,thread-groups[0]
+ * matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ * interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups *tg,
+ unsigned int property)
+{
+ int i;
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+ u32 *thread_list;
+ size_t total_threads;
+ int ret;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, 3);
+ if (ret)
+ return ret;
+
+ tg->property = thread_group_array[0];
+ tg->nr_groups = thread_group_array[1];
+ tg->threads_per_group = thread_group_array[2];
+ if (tg->property != property ||
+ tg->nr_groups < 1 ||
+ tg->threads_per_group < 1)
+ return -ENODATA;
+
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array,
+ 3 + total_threads);
+ if (ret)
+ return ret;
+
+ thread_list = &thread_group_array[3];
+
+ for (i = 0 ; i < total_threads; i++)
+ tg->thread_list[i] = thread_list[i];
+
+ return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups tg = {.property = 0,
+ .nr_groups = 0,
+ .threads_per_group = 0};
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i, cpu_group_start = -1, err = 0;
+
+ if (!dn)
+ return -ENODATA;
+
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+ if (err)
+ goto out;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ }
+
+out:
+ of_node_put(dn);
+ return err;
+}
+
+static int init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_l1_cache_map(cpu);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -712,6 +875,12 @@
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
}
@@ -723,7 +892,7 @@
paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -808,14 +977,13 @@
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ idle->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
@@ -995,10 +1163,28 @@
set_cpus_unrelated(cpu, i, cpu_core_mask);
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
}
#endif
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
+ int i, first_thread = cpu_first_thread_sibling(cpu);
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
@@ -1015,6 +1201,7 @@
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
+ add_cpu_to_smallcore_masks(cpu);
/*
* Copy the thread sibling mask into the cache sibling mask
* and mark any CPUs that share an L2 with this CPU.
@@ -1044,6 +1231,7 @@
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -1069,11 +1257,13 @@
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1083,6 +1273,8 @@
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ boot_init_stack_canary();
+
local_irq_enable();
/* We can enable ftrace for secondary cpus now */
@@ -1140,6 +1332,13 @@
return cpu_l2_cache_mask(cpu);
}
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
static struct sched_domain_topology_level power9_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
@@ -1167,6 +1366,13 @@
shared_proc_topology_init();
dump_numa_cpu_topology();
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Using small cores at SMT level\n");
+ power9_topology[0].mask = smallcore_smt_mask;
+ powerpc_topology[0].mask = smallcore_smt_mask;
+ }
+#endif
/*
* If any CPU detects that it's sharing a cache with another CPU then
* use the deeper topology that is aware of this sharing.