Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index df90d4d..0dc1634 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -21,7 +21,7 @@
}
/*
- * Return then value that expedited-grace-period counter will have
+ * Return the value that the expedited-grace-period counter will have
* at the end of the current grace period.
*/
static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
@@ -39,7 +39,9 @@
}
/*
- * Take a snapshot of the expedited-grace-period counter.
+ * Take a snapshot of the expedited-grace-period counter, which is the
+ * earliest value that will indicate that a full grace period has
+ * elapsed since the current time.
*/
static unsigned long rcu_exp_gp_seq_snap(void)
{
@@ -143,31 +145,26 @@
* Return non-zero if there is no RCU expedited grace period in progress
* for the specified rcu_node structure, in other words, if all CPUs and
* tasks covered by the specified rcu_node structure have done their bit
- * for the current expedited grace period. Works only for preemptible
- * RCU -- other RCU implementation use other means.
- *
- * Caller must hold the specificed rcu_node structure's ->lock
+ * for the current expedited grace period.
*/
-static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+static bool sync_rcu_exp_done(struct rcu_node *rnp)
{
raw_lockdep_assert_held_rcu_node(rnp);
-
- return rnp->exp_tasks == NULL &&
+ return READ_ONCE(rnp->exp_tasks) == NULL &&
READ_ONCE(rnp->expmask) == 0;
}
/*
- * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
- * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
- * itself
+ * Like sync_rcu_exp_done(), but where the caller does not hold the
+ * rcu_node's ->lock.
*/
-static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
+static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
{
unsigned long flags;
bool ret;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- ret = sync_rcu_preempt_exp_done(rnp);
+ ret = sync_rcu_exp_done(rnp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ret;
@@ -181,8 +178,6 @@
* which the task was queued or to one of that rcu_node structure's ancestors,
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
- *
- * Caller must hold the specified rcu_node structure's ->lock.
*/
static void __rcu_report_exp_rnp(struct rcu_node *rnp,
bool wake, unsigned long flags)
@@ -190,8 +185,9 @@
{
unsigned long mask;
+ raw_lockdep_assert_held_rcu_node(rnp);
for (;;) {
- if (!sync_rcu_preempt_exp_done(rnp)) {
+ if (!sync_rcu_exp_done(rnp)) {
if (!rnp->expmask)
rcu_initiate_boost(rnp, flags);
else
@@ -234,7 +230,9 @@
static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
unsigned long mask, bool wake)
{
+ int cpu;
unsigned long flags;
+ struct rcu_data *rdp;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!(rnp->expmask & mask)) {
@@ -242,6 +240,13 @@
return;
}
WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
+ for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (!IS_ENABLED(CONFIG_NO_HZ_FULL) || !rdp->rcu_forced_tick_exp)
+ continue;
+ rdp->rcu_forced_tick_exp = false;
+ tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+ }
__rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
}
@@ -309,7 +314,7 @@
sync_exp_work_done(s));
return true;
}
- rnp->exp_seq_rq = s; /* Followers can wait on us. */
+ WRITE_ONCE(rnp->exp_seq_rq, s); /* Followers can wait on us. */
spin_unlock(&rnp->exp_lock);
trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
rnp->grplo, rnp->grphi, TPS("nxtlvl"));
@@ -345,8 +350,8 @@
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ unsigned long mask = rdp->grpmask;
int snap;
if (raw_smp_processor_id() == cpu ||
@@ -368,13 +373,13 @@
* until such time as the ->expmask bits are cleared.
*/
if (rcu_preempt_has_tasks(rnp))
- rnp->exp_tasks = rnp->blkd_tasks.next;
+ WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* IPI the remaining CPUs for expedited quiescent state. */
for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ unsigned long mask = rdp->grpmask;
retry_ipi:
if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) {
@@ -382,15 +387,16 @@
continue;
}
if (get_cpu() == cpu) {
+ mask_ofl_test |= mask;
put_cpu();
continue;
}
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
put_cpu();
- if (!ret) {
- mask_ofl_ipi &= ~mask;
+ /* The CPU will report the QS in response to the IPI. */
+ if (!ret)
continue;
- }
+
/* Failed, raced with CPU hotplug operation. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rnp->qsmaskinitnext & mask) &&
@@ -398,16 +404,15 @@
/* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
- schedule_timeout_uninterruptible(1);
+ schedule_timeout_idle(1);
goto retry_ipi;
}
- /* CPU really is offline, so we can ignore it. */
- if (!(rnp->expmask & mask))
- mask_ofl_ipi &= ~mask;
+ /* CPU really is offline, so we must report its QS. */
+ if (rnp->expmask & mask)
+ mask_ofl_test |= mask;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/* Report quiescent states for those that went offline. */
- mask_ofl_test |= mask_ofl_ipi;
if (mask_ofl_test)
rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
}
@@ -454,30 +459,67 @@
flush_work(&rnp->rew.rew_work);
}
-static void synchronize_sched_expedited_wait(void)
+/*
+ * Wait for the expedited grace period to elapse, within time limit.
+ * If the time limit is exceeded without the grace period elapsing,
+ * return false, otherwise return true.
+ */
+static bool synchronize_rcu_expedited_wait_once(long tlimit)
+{
+ int t;
+ struct rcu_node *rnp_root = rcu_get_root();
+
+ t = swait_event_timeout_exclusive(rcu_state.expedited_wq,
+ sync_rcu_exp_done_unlocked(rnp_root),
+ tlimit);
+ // Workqueues should not be signaled.
+ if (t > 0 || sync_rcu_exp_done_unlocked(rnp_root))
+ return true;
+ WARN_ON(t < 0); /* workqueues should not be signaled. */
+ return false;
+}
+
+/*
+ * Wait for the expedited grace period to elapse, issuing any needed
+ * RCU CPU stall warnings along the way.
+ */
+static void synchronize_rcu_expedited_wait(void)
{
int cpu;
+ unsigned long j;
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
int ndetected;
+ struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root();
- int ret;
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_jiffies_till_stall_check();
jiffies_start = jiffies;
+ if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
+ if (synchronize_rcu_expedited_wait_once(1))
+ return;
+ rcu_for_each_leaf_node(rnp) {
+ for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (rdp->rcu_forced_tick_exp)
+ continue;
+ rdp->rcu_forced_tick_exp = true;
+ tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+ }
+ }
+ j = READ_ONCE(jiffies_till_first_fqs);
+ if (synchronize_rcu_expedited_wait_once(j + HZ))
+ return;
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT));
+ }
for (;;) {
- ret = swait_event_timeout_exclusive(
- rcu_state.expedited_wq,
- sync_rcu_preempt_exp_done_unlocked(rnp_root),
- jiffies_stall);
- if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
+ if (synchronize_rcu_expedited_wait_once(jiffies_stall))
return;
- WARN_ON(ret < 0); /* workqueues should not be signaled. */
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
continue;
panic_on_rcu_stall();
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
@@ -501,19 +543,19 @@
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rcu_state.expedited_sequence,
- READ_ONCE(rnp_root->expmask),
- ".T"[!!rnp_root->exp_tasks]);
+ data_race(rnp_root->expmask),
+ ".T"[!!data_race(rnp_root->exp_tasks)]);
if (ndetected) {
pr_err("blocking rcu_node structures:");
rcu_for_each_node_breadth_first(rnp) {
if (rnp == rnp_root)
continue; /* printed unconditionally */
- if (sync_rcu_preempt_exp_done_unlocked(rnp))
+ if (sync_rcu_exp_done_unlocked(rnp))
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi,
- READ_ONCE(rnp->expmask),
- ".T"[!!rnp->exp_tasks]);
+ data_race(rnp->expmask),
+ ".T"[!!data_race(rnp->exp_tasks)]);
}
pr_cont("\n");
}
@@ -539,7 +581,7 @@
{
struct rcu_node *rnp;
- synchronize_sched_expedited_wait();
+ synchronize_rcu_expedited_wait();
// Switch over to wakeup mode, allowing the next GP to proceed.
// End the previous grace period only after acquiring the mutex
@@ -553,7 +595,7 @@
spin_lock(&rnp->exp_lock);
/* Recheck, avoid hang in case someone just arrived. */
if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
- rnp->exp_seq_rq = s;
+ WRITE_ONCE(rnp->exp_seq_rq, s);
spin_unlock(&rnp->exp_lock);
}
smp_mb(); /* All above changes before wakeup. */
@@ -598,6 +640,7 @@
*/
static void rcu_exp_handler(void *unused)
{
+ int depth = rcu_preempt_depth();
unsigned long flags;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
@@ -608,7 +651,7 @@
* critical section. If also enabled or idle, immediately
* report the quiescent state, otherwise defer.
*/
- if (!t->rcu_read_lock_nesting) {
+ if (!depth) {
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
rcu_dynticks_curr_cpu_in_eqs()) {
rcu_report_exp_rdp(rdp);
@@ -632,7 +675,7 @@
* can have caused this quiescent state to already have been
* reported, so we really do need to check ->expmask.
*/
- if (t->rcu_read_lock_nesting > 0) {
+ if (depth > 0) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
rdp->exp_deferred_qs = true;
@@ -642,33 +685,11 @@
return;
}
- /*
- * The final and least likely case is where the interrupted
- * code was just about to or just finished exiting the RCU-preempt
- * read-side critical section, and no, we can't tell which.
- * So either way, set ->deferred_qs to flag later code that
- * a quiescent state is required.
- *
- * If the CPU is fully enabled (or if some buggy RCU-preempt
- * read-side critical section is being used from idle), just
- * invoke rcu_preempt_deferred_qs() to immediately report the
- * quiescent state. We cannot use rcu_read_unlock_special()
- * because we are in an interrupt handler, which will cause that
- * function to take an early exit without doing anything.
- *
- * Otherwise, force a context switch after the CPU enables everything.
- */
- rdp->exp_deferred_qs = true;
- if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
- WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
- rcu_preempt_deferred_qs(t);
- } else {
- set_tsk_need_resched(t);
- set_preempt_need_resched();
- }
+ // Finally, negative nesting depth should not happen.
+ WARN_ON_ONCE(1);
}
-/* PREEMPT=y, so no PREEMPT=n expedited grace period to clean up after. */
+/* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
static void sync_sched_exp_online_cleanup(int cpu)
{
}
@@ -680,17 +701,20 @@
*/
static int rcu_print_task_exp_stall(struct rcu_node *rnp)
{
- struct task_struct *t;
+ unsigned long flags;
int ndetected = 0;
+ struct task_struct *t;
- if (!rnp->exp_tasks)
+ if (!READ_ONCE(rnp->exp_tasks))
return 0;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
pr_cont(" P%d", t->pid);
ndetected++;
}
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ndetected;
}
@@ -709,11 +733,9 @@
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
- struct rcu_data *rdp;
- struct rcu_node *rnp;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ struct rcu_node *rnp = rdp->mynode;
- rdp = this_cpu_ptr(&rcu_data);
- rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
@@ -738,7 +760,7 @@
my_cpu = get_cpu();
/* Quiescent state either not needed or already requested, leave. */
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
- __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
+ rdp->cpu_no_qs.b.exp) {
put_cpu();
return;
}
@@ -783,7 +805,7 @@
* implementations, it is still unfriendly to real-time workloads, so is
* thus not recommended for any sort of common-case code. In fact, if
* you are using synchronize_rcu_expedited() in a loop, please restructure
- * your code to batch your updates, and then Use a single synchronize_rcu()
+ * your code to batch your updates, and then use a single synchronize_rcu()
* instead.
*
* This has the same semantics as (but is more brutal than) synchronize_rcu().