Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b8c9744..251a9af 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -17,10 +17,12 @@
int sysctl_panic_on_rcu_stall __read_mostly;
#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
-#define RCU_STALL_DELAY_DELTA 0
+#define RCU_STALL_DELAY_DELTA 0
#endif
+#define RCU_STALL_MIGHT_DIV 8
+#define RCU_STALL_MIGHT_MIN (2 * HZ)
/* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void)
@@ -42,6 +44,36 @@
}
EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
+/**
+ * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
+ *
+ * Returns @true if the current grace period is sufficiently old that
+ * it is reasonable to assume that it might be stalled. This can be
+ * useful when deciding whether to allocate memory to enable RCU-mediated
+ * freeing on the one hand or just invoking synchronize_rcu() on the other.
+ * The latter is preferable when the grace period is stalled.
+ *
+ * Note that sampling of the .gp_start and .gp_seq fields must be done
+ * carefully to avoid false positives at the beginnings and ends of
+ * grace periods.
+ */
+bool rcu_gp_might_be_stalled(void)
+{
+ unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
+ unsigned long j = jiffies;
+
+ if (d < RCU_STALL_MIGHT_MIN)
+ d = RCU_STALL_MIGHT_MIN;
+ smp_mb(); // jiffies before .gp_seq to avoid false positives.
+ if (!rcu_gp_in_progress())
+ return false;
+ // Long delays at this point avoids false positive, but a delay
+ // of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
+ smp_mb(); // .gp_seq before second .gp_start
+ // And ditto here.
+ return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
+}
+
/* Don't do RCU CPU stall warnings during long sysrq printouts. */
void rcu_sysrq_start(void)
{
@@ -104,10 +136,10 @@
unsigned long j = jiffies;
unsigned long j1;
- rcu_state.gp_start = j;
+ WRITE_ONCE(rcu_state.gp_start, j);
j1 = rcu_jiffies_till_stall_check();
- /* Record ->gp_start before ->jiffies_stall. */
- smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
+ smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
+ WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
rcu_state.jiffies_resched = j + j1 / 2;
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
}
@@ -128,7 +160,7 @@
{
unsigned long j;
- if (!rcu_kick_kthreads)
+ if (!READ_ONCE(rcu_kick_kthreads))
return;
j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
if (time_after(jiffies, j) && rcu_state.gp_kthread &&
@@ -165,7 +197,7 @@
//
// Printing RCU CPU stall warnings
-#ifdef CONFIG_PREEMPTION
+#ifdef CONFIG_PREEMPT_RCU
/*
* Dump detailed information for all tasks blocking the current RCU
@@ -194,30 +226,78 @@
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+// Communicate task state back to the RCU CPU stall warning request.
+struct rcu_stall_chk_rdr {
+ int nesting;
+ union rcu_special rs;
+ bool on_blkd_list;
+};
+
+/*
+ * Report out the state of a not-running task that is stalling the
+ * current RCU grace period.
+ */
+static bool check_slow_task(struct task_struct *t, void *arg)
+{
+ struct rcu_stall_chk_rdr *rscrp = arg;
+
+ if (task_curr(t))
+ return false; // It is running, so decline to inspect it.
+ rscrp->nesting = t->rcu_read_lock_nesting;
+ rscrp->rs = t->rcu_read_unlock_special;
+ rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
+ return true;
+}
+
/*
* Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
+ * sections, printing out the tid of each of the first few of them.
*/
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
{
- struct task_struct *t;
+ int i = 0;
int ndetected = 0;
+ struct rcu_stall_chk_rdr rscr;
+ struct task_struct *t;
+ struct task_struct *ts[8];
- if (!rcu_preempt_blocked_readers_cgp(rnp))
+ lockdep_assert_irqs_disabled();
+ if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
+ }
pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
rnp->level, rnp->grplo, rnp->grphi);
t = list_entry(rnp->gp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
- pr_cont(" P%d", t->pid);
+ get_task_struct(t);
+ ts[i++] = t;
+ if (i >= ARRAY_SIZE(ts))
+ break;
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ while (i) {
+ t = ts[--i];
+ if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
+ pr_cont(" P%d", t->pid);
+ else
+ pr_cont(" P%d/%d:%c%c%c%c",
+ t->pid, rscr.nesting,
+ ".b"[rscr.rs.b.blocked],
+ ".q"[rscr.rs.b.need_qs],
+ ".e"[rscr.rs.b.exp_hint],
+ ".l"[rscr.on_blkd_list]);
+ lockdep_assert_irqs_disabled();
+ put_task_struct(t);
ndetected++;
}
pr_cont("\n");
return ndetected;
}
-#else /* #ifdef CONFIG_PREEMPTION */
+#else /* #ifdef CONFIG_PREEMPT_RCU */
/*
* Because preemptible RCU does not exist, we never have to check for
@@ -231,11 +311,12 @@
* Because preemptible RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
{
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
-#endif /* #else #ifdef CONFIG_PREEMPTION */
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
/*
* Dump stacks of all tasks running on stalled CPUs. First try using
@@ -265,11 +346,9 @@
{
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
- sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c",
+ sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
rdp->last_accelerate & 0xffff, jiffies & 0xffff,
- ".l"[rdp->all_lazy],
- ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
- ".D"[!!rdp->tick_nohz_enabled_snap]);
+ !!rdp->tick_nohz_enabled_snap);
}
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
@@ -281,6 +360,38 @@
#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
+static const char * const gp_state_names[] = {
+ [RCU_GP_IDLE] = "RCU_GP_IDLE",
+ [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS",
+ [RCU_GP_DONE_GPS] = "RCU_GP_DONE_GPS",
+ [RCU_GP_ONOFF] = "RCU_GP_ONOFF",
+ [RCU_GP_INIT] = "RCU_GP_INIT",
+ [RCU_GP_WAIT_FQS] = "RCU_GP_WAIT_FQS",
+ [RCU_GP_DOING_FQS] = "RCU_GP_DOING_FQS",
+ [RCU_GP_CLEANUP] = "RCU_GP_CLEANUP",
+ [RCU_GP_CLEANED] = "RCU_GP_CLEANED",
+};
+
+/*
+ * Convert a ->gp_state value to a character string.
+ */
+static const char *gp_state_getname(short gs)
+{
+ if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+ return "???";
+ return gp_state_names[gs];
+}
+
+/* Is the RCU grace-period kthread being starved of CPU time? */
+static bool rcu_is_gp_kthread_starving(unsigned long *jp)
+{
+ unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity);
+
+ if (jp)
+ *jp = j;
+ return j > 2 * HZ;
+}
+
/*
* Print out diagnostic information for the specified stalled CPU.
*
@@ -295,6 +406,7 @@
static void print_cpu_stall_info(int cpu)
{
unsigned long delta;
+ bool falsepositive;
char fast_no_hz[72];
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
char *ticks_title;
@@ -315,7 +427,9 @@
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
- pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
+ falsepositive = rcu_is_gp_kthread_starving(NULL) &&
+ rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+ pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
cpu,
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@@ -327,8 +441,9 @@
rcu_dynticks_snap(rdp) & 0xfff,
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
- READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
- fast_no_hz);
+ data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
+ fast_no_hz,
+ falsepositive ? " (false positive?)" : "");
}
/* Complain about starvation of grace-period kthread. */
@@ -337,15 +452,15 @@
struct task_struct *gpk = rcu_state.gp_kthread;
unsigned long j;
- j = jiffies - READ_ONCE(rcu_state.gp_activity);
- if (j > 2 * HZ) {
+ if (rcu_is_gp_kthread_starving(&j)) {
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
- READ_ONCE(rcu_state.gp_flags),
+ data_race(rcu_state.gp_flags),
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
if (gpk) {
+ pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
pr_err("RCU grace-period kthread stack dump:\n");
sched_show_task(gpk);
wake_up_process(gpk);
@@ -353,7 +468,7 @@
}
}
-static void print_other_cpu_stall(unsigned long gp_seq)
+static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
{
int cpu;
unsigned long flags;
@@ -363,20 +478,21 @@
struct rcu_node *rnp;
long totqlen = 0;
+ lockdep_assert_irqs_disabled();
+
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
return;
/*
* OK, time to rat on our buddy...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * See Documentation/RCU/stallwarn.rst for info on how to debug
* RCU CPU stall warnings.
*/
pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
rcu_for_each_leaf_node(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for_each_leaf_node_possible_cpu(rnp, cpu)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
@@ -384,13 +500,14 @@
ndetected++;
}
}
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
+ lockdep_assert_irqs_disabled();
}
for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
- smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
+ smp_processor_id(), (long)(jiffies - gps),
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
if (ndetected) {
rcu_dump_cpu_stacks();
@@ -403,13 +520,11 @@
pr_err("INFO: Stall ended before state dump start\n");
} else {
j = jiffies;
- gpa = READ_ONCE(rcu_state.gp_activity);
+ gpa = data_race(rcu_state.gp_activity);
pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
rcu_state.name, j - gpa, j, gpa,
- READ_ONCE(jiffies_till_next_fqs),
+ data_race(jiffies_till_next_fqs),
rcu_get_root()->qsmask);
- /* In this case, the current CPU might be at fault. */
- sched_show_task(current);
}
}
/* Rewrite if needed in case of slow consoles. */
@@ -424,7 +539,7 @@
rcu_force_quiescent_state(); /* Kick them all. */
}
-static void print_cpu_stall(void)
+static void print_cpu_stall(unsigned long gps)
{
int cpu;
unsigned long flags;
@@ -432,14 +547,16 @@
struct rcu_node *rnp = rcu_get_root();
long totqlen = 0;
+ lockdep_assert_irqs_disabled();
+
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
return;
/*
* OK, time to rat on ourselves...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * See Documentation/RCU/stallwarn.rst for info on how to debug
* RCU CPU stall warnings.
*/
pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
@@ -449,7 +566,7 @@
for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
- jiffies - rcu_state.gp_start,
+ jiffies - gps,
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
rcu_check_gp_kthread_starvation();
@@ -486,7 +603,8 @@
unsigned long js;
struct rcu_node *rnp;
- if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+ lockdep_assert_irqs_disabled();
+ if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) ||
!rcu_gp_in_progress())
return;
rcu_stall_kick_kthreads();
@@ -536,8 +654,8 @@
return;
/* We haven't checked in, so go dump stack. */
- print_cpu_stall();
- if (rcu_cpu_stall_ftrace_dump)
+ print_cpu_stall(gps);
+ if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
} else if (rcu_gp_in_progress() &&
@@ -553,8 +671,8 @@
return;
/* They had a few time units to dump stack, so complain. */
- print_other_cpu_stall(gs2);
- if (rcu_cpu_stall_ftrace_dump)
+ print_other_cpu_stall(gs2, gps);
+ if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
}
}
@@ -569,6 +687,7 @@
*/
void show_rcu_gp_kthreads(void)
{
+ unsigned long cbs = 0;
int cpu;
unsigned long j;
unsigned long ja;
@@ -576,43 +695,46 @@
unsigned long jw;
struct rcu_data *rdp;
struct rcu_node *rnp;
+ struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
j = jiffies;
- ja = j - READ_ONCE(rcu_state.gp_activity);
- jr = j - READ_ONCE(rcu_state.gp_req_activity);
- jw = j - READ_ONCE(rcu_state.gp_wake_time);
+ ja = j - data_race(rcu_state.gp_activity);
+ jr = j - data_race(rcu_state.gp_req_activity);
+ jw = j - data_race(rcu_state.gp_wake_time);
pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state),
- rcu_state.gp_state,
- rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
- ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
- (long)READ_ONCE(rcu_state.gp_seq),
- (long)READ_ONCE(rcu_get_root()->gp_seq_needed),
- READ_ONCE(rcu_state.gp_flags));
+ rcu_state.gp_state, t ? t->state : 0x1ffffL,
+ ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
+ (long)data_race(rcu_state.gp_seq),
+ (long)data_race(rcu_get_root()->gp_seq_needed),
+ data_race(rcu_state.gp_flags));
rcu_for_each_node_breadth_first(rnp) {
- if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
+ if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
+ READ_ONCE(rnp->gp_seq_needed)))
continue;
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
- rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
- (long)rnp->gp_seq_needed);
+ rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
+ (long)data_race(rnp->gp_seq_needed));
if (!rcu_is_leaf_node(rnp))
continue;
for_each_leaf_node_possible_cpu(rnp, cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rdp->gpwrap ||
- ULONG_CMP_GE(rcu_state.gp_seq,
- rdp->gp_seq_needed))
+ if (READ_ONCE(rdp->gpwrap) ||
+ ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
+ READ_ONCE(rdp->gp_seq_needed)))
continue;
pr_info("\tcpu %d ->gp_seq_needed %ld\n",
- cpu, (long)rdp->gp_seq_needed);
+ cpu, (long)data_race(rdp->gp_seq_needed));
}
}
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
+ cbs += data_race(rdp->n_cbs_invoked);
if (rcu_segcblist_is_offloaded(&rdp->cblist))
show_rcu_nocb_state(rdp);
}
- /* sched_show_task(rcu_state.gp_kthread); */
+ pr_info("RCU callbacks invoked since boot: %lu\n", cbs);
+ show_rcu_tasks_gp_kthreads();
}
EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
@@ -629,7 +751,9 @@
static atomic_t warned = ATOMIC_INIT(0);
if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
+ ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
+ READ_ONCE(rnp_root->gp_seq_needed)) ||
+ !smp_load_acquire(&rcu_state.gp_kthread)) // Get stable kthread.
return;
j = jiffies; /* Expensive access, and in common case don't get here. */
if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
@@ -640,7 +764,8 @@
raw_spin_lock_irqsave_rcu_node(rnp, flags);
j = jiffies;
if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
+ ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
+ READ_ONCE(rnp_root->gp_seq_needed)) ||
time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
atomic_read(&warned)) {
@@ -653,9 +778,10 @@
raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
j = jiffies;
if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
- time_before(j, rcu_state.gp_activity + gpssdelay) ||
+ ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
+ READ_ONCE(rnp_root->gp_seq_needed)) ||
+ time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
+ time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
atomic_xchg(&warned, 1)) {
if (rnp_root != rnp)
/* irqs remain disabled. */
@@ -722,7 +848,7 @@
show_rcu_gp_kthreads();
}
-static struct sysrq_key_op sysrq_rcudump_op = {
+static const struct sysrq_key_op sysrq_rcudump_op = {
.handler = sysrq_show_rcu,
.help_msg = "show-rcu(y)",
.action_msg = "Show RCU tree",