Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a71a4a2..f5ba074 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -36,6 +36,8 @@
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
@@ -56,6 +58,8 @@
pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
if (qlowmark != DEFAULT_RCU_QLOMARK)
pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
+ if (qovld != DEFAULT_RCU_QOVLD)
+ pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
if (jiffies_till_first_fqs != ULONG_MAX)
pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
if (jiffies_till_next_fqs != ULONG_MAX)
@@ -224,7 +228,7 @@
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
}
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
- rnp->exp_tasks = &t->rcu_node_entry;
+ WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
!(rnp->qsmask & rdp->grpmask));
WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
@@ -290,8 +294,8 @@
trace_rcu_utilization(TPS("Start context switch"));
lockdep_assert_irqs_disabled();
- WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
- if (t->rcu_read_lock_nesting > 0 &&
+ WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
+ if (rcu_preempt_depth() > 0 &&
!t->rcu_read_unlock_special.b.blocked) {
/* Possibly blocking in an RCU read-side critical section. */
@@ -329,6 +333,7 @@
rcu_qs();
if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
+ rcu_tasks_qs(current, preempt);
trace_rcu_utilization(TPS("End context switch"));
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -343,11 +348,24 @@
return READ_ONCE(rnp->gp_tasks) != NULL;
}
-/* Bias and limit values for ->rcu_read_lock_nesting. */
-#define RCU_NEST_BIAS INT_MAX
-#define RCU_NEST_NMAX (-INT_MAX / 2)
+/* limit value for ->rcu_read_lock_nesting. */
#define RCU_NEST_PMAX (INT_MAX / 2)
+static void rcu_preempt_read_enter(void)
+{
+ current->rcu_read_lock_nesting++;
+}
+
+static int rcu_preempt_read_exit(void)
+{
+ return --current->rcu_read_lock_nesting;
+}
+
+static void rcu_preempt_depth_set(int val)
+{
+ current->rcu_read_lock_nesting = val;
+}
+
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
@@ -355,9 +373,11 @@
*/
void __rcu_read_lock(void)
{
- current->rcu_read_lock_nesting++;
+ rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
- WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX);
+ WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -373,21 +393,15 @@
{
struct task_struct *t = current;
- if (t->rcu_read_lock_nesting != 1) {
- --t->rcu_read_lock_nesting;
- } else {
+ if (rcu_preempt_read_exit() == 0) {
barrier(); /* critical section before exit code. */
- t->rcu_read_lock_nesting = -RCU_NEST_BIAS;
- barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
- barrier(); /* ->rcu_read_unlock_special load before assign */
- t->rcu_read_lock_nesting = 0;
}
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
- int rrln = t->rcu_read_lock_nesting;
+ int rrln = rcu_preempt_depth();
- WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX);
+ WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
}
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -444,13 +458,13 @@
local_irq_restore(flags);
return;
}
- t->rcu_read_unlock_special.b.deferred_qs = false;
+ t->rcu_read_unlock_special.s = 0;
if (special.b.need_qs) {
- rcu_qs();
- t->rcu_read_unlock_special.b.need_qs = false;
- if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
- local_irq_restore(flags);
- return;
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+ } else {
+ rcu_qs();
}
}
@@ -460,17 +474,11 @@
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
- if (rdp->exp_deferred_qs) {
+ if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
- if (!t->rcu_read_unlock_special.s) {
- local_irq_restore(flags);
- return;
- }
- }
/* Clean up if blocked during RCU read-side critical section. */
if (special.b.blocked) {
- t->rcu_read_unlock_special.b.blocked = false;
/*
* Remove this task from the list it blocked on. The task
@@ -485,7 +493,7 @@
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
(!empty_norm || rnp->qsmask));
- empty_exp = sync_rcu_preempt_exp_done(rnp);
+ empty_exp = sync_rcu_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
np = rcu_next_node_entry(t, rnp);
list_del_init(&t->rcu_node_entry);
@@ -495,12 +503,12 @@
if (&t->rcu_node_entry == rnp->gp_tasks)
WRITE_ONCE(rnp->gp_tasks, np);
if (&t->rcu_node_entry == rnp->exp_tasks)
- rnp->exp_tasks = np;
+ WRITE_ONCE(rnp->exp_tasks, np);
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
if (&t->rcu_node_entry == rnp->boost_tasks)
- rnp->boost_tasks = np;
+ WRITE_ONCE(rnp->boost_tasks, np);
}
/*
@@ -509,7 +517,7 @@
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
* so we must take a snapshot of the expedited state.
*/
- empty_exp_now = sync_rcu_preempt_exp_done(rnp);
+ empty_exp_now = sync_rcu_exp_done(rnp);
if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
rnp->gp_seq,
@@ -523,16 +531,17 @@
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
- /* Unboost if we were boosted. */
- if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
- rt_mutex_futex_unlock(&rnp->boost_mtx);
-
/*
* If this was the last task on the expedited lists,
* then we need to report up the rcu_node hierarchy.
*/
if (!empty_exp && empty_exp_now)
rcu_report_exp_rnp(rnp, true);
+
+ /* Unboost if we were boosted. */
+ if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
+ rt_mutex_futex_unlock(&rnp->boost_mtx);
+
} else {
local_irq_restore(flags);
}
@@ -551,7 +560,7 @@
{
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
- t->rcu_read_lock_nesting <= 0;
+ rcu_preempt_depth() == 0;
}
/*
@@ -564,16 +573,11 @@
static void rcu_preempt_deferred_qs(struct task_struct *t)
{
unsigned long flags;
- bool couldrecurse = t->rcu_read_lock_nesting >= 0;
if (!rcu_preempt_need_deferred_qs(t))
return;
- if (couldrecurse)
- t->rcu_read_lock_nesting -= RCU_NEST_BIAS;
local_irq_save(flags);
rcu_preempt_deferred_qs_irqrestore(t, flags);
- if (couldrecurse)
- t->rcu_read_lock_nesting += RCU_NEST_BIAS;
}
/*
@@ -610,24 +614,22 @@
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
- t->rcu_read_unlock_special.b.exp_hint = false;
- exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
- (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
- tick_nohz_full_cpu(rdp->cpu);
+ exp = (t->rcu_blocked_node &&
+ READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
+ (rdp->grpmask & READ_ONCE(rnp->expmask));
// Need to defer quiescent state until everything is enabled.
- if (irqs_were_disabled && use_softirq &&
- (in_interrupt() ||
- (exp && !t->rcu_read_unlock_special.b.deferred_qs))) {
- // Using softirq, safe to awaken, and we get
- // no help from enabling irqs, unlike bh/preempt.
+ if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {
+ // Using softirq, safe to awaken, and either the
+ // wakeup is free or there is an expedited GP.
raise_softirq_irqoff(RCU_SOFTIRQ);
} else {
// Enabling BH or preempt does reschedule, so...
- // Also if no expediting or NO_HZ_FULL, slow is OK.
+ // Also if no expediting, slow is OK.
+ // Plus nohz_full CPUs eventually get tick enabled.
set_tsk_need_resched(current);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
- !rdp->defer_qs_iw_pending && exp) {
+ !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
init_irq_work(&rdp->defer_qs_iw,
@@ -636,11 +638,9 @@
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
}
}
- t->rcu_read_unlock_special.b.deferred_qs = true;
local_irq_restore(flags);
return;
}
- WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
rcu_preempt_deferred_qs_irqrestore(t, flags);
}
@@ -648,8 +648,7 @@
* Check that the list of blocked tasks for the newly completed grace
* period is in fact empty. It is a serious bug to complete a grace
* period that still has RCU readers blocked! This function must be
- * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock
- * must be held by the caller.
+ * invoked -before- updating this rnp's ->gp_seq.
*
* Also, if there are blocked tasks on the list, they automatically
* block the newly created grace period, so set up ->gp_tasks accordingly.
@@ -659,6 +658,7 @@
struct task_struct *t;
RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
+ raw_lockdep_assert_held_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
dump_blkd_tasks(rnp, 10);
if (rcu_preempt_has_tasks(rnp) &&
@@ -683,10 +683,11 @@
{
struct task_struct *t = current;
+ lockdep_assert_irqs_disabled();
if (user || rcu_is_cpu_rrupt_from_idle()) {
rcu_note_voluntary_context_switch(current);
}
- if (t->rcu_read_lock_nesting > 0 ||
+ if (rcu_preempt_depth() > 0 ||
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */
if (rcu_preempt_need_deferred_qs(t)) {
@@ -696,13 +697,13 @@
} else if (rcu_preempt_need_deferred_qs(t)) {
rcu_preempt_deferred_qs(t); /* Report deferred QS. */
return;
- } else if (!t->rcu_read_lock_nesting) {
+ } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
rcu_qs(); /* Report immediate QS. */
return;
}
/* If GP is oldish, ask for help from rcu_read_unlock_special(). */
- if (t->rcu_read_lock_nesting > 0 &&
+ if (rcu_preempt_depth() > 0 &&
__this_cpu_read(rcu_data.core_needs_qs) &&
__this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
!t->rcu_read_unlock_special.b.need_qs &&
@@ -723,11 +724,11 @@
struct task_struct *t = current;
if (unlikely(!list_empty(¤t->rcu_node_entry))) {
- t->rcu_read_lock_nesting = 1;
+ rcu_preempt_depth_set(1);
barrier();
WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
- } else if (unlikely(t->rcu_read_lock_nesting)) {
- t->rcu_read_lock_nesting = 1;
+ } else if (unlikely(rcu_preempt_depth())) {
+ rcu_preempt_depth_set(1);
} else {
return;
}
@@ -752,13 +753,13 @@
raw_lockdep_assert_held_rcu_node(rnp);
pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
__func__, rnp->grplo, rnp->grphi, rnp->level,
- (long)rnp->gp_seq, (long)rnp->completedqs);
+ (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
- __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
- rnp->exp_tasks);
+ __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
+ READ_ONCE(rnp->exp_tasks));
pr_info("%s: ->blkd_tasks", __func__);
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
@@ -780,6 +781,24 @@
#else /* #ifdef CONFIG_PREEMPT_RCU */
/*
+ * If strict grace periods are enabled, and if the calling
+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
+ * report that quiescent state and, if requested, spin for a bit.
+ */
+void rcu_read_unlock_strict(void)
+{
+ struct rcu_data *rdp;
+
+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
+ irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
+ return;
+ rdp = this_cpu_ptr(&rcu_data);
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+}
+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
+
+/*
* Tell them what RCU they are running.
*/
static void __init rcu_bootup_announce(void)
@@ -789,7 +808,7 @@
}
/*
- * Note a quiescent state for PREEMPT=n. Because we do not need to know
+ * Note a quiescent state for PREEMPTION=n. Because we do not need to know
* how many quiescent states passed, just if there was at least one since
* the start of the grace period, this just sets a flag. The caller must
* have disabled preemption.
@@ -839,7 +858,7 @@
EXPORT_SYMBOL_GPL(rcu_all_qs);
/*
- * Note a PREEMPT=n context switch. The caller must have disabled interrupts.
+ * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
*/
void rcu_note_context_switch(bool preempt)
{
@@ -851,8 +870,7 @@
this_cpu_write(rcu_data.rcu_urgent_qs, false);
if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
rcu_momentary_dyntick_idle();
- if (!preempt)
- rcu_tasks_qs(current);
+ rcu_tasks_qs(current, preempt);
out:
trace_rcu_utilization(TPS("End context switch"));
}
@@ -1031,20 +1049,21 @@
trace_rcu_utilization(TPS("Start boost kthread@init"));
for (;;) {
- rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
- rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
+ rcu_wait(READ_ONCE(rnp->boost_tasks) ||
+ READ_ONCE(rnp->exp_tasks));
trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
- rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
more2boost = rcu_boost(rnp);
if (more2boost)
spincnt++;
else
spincnt = 0;
if (spincnt > 10) {
- rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
- schedule_timeout_interruptible(2);
+ schedule_timeout_idle(2);
trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
spincnt = 0;
}
@@ -1076,12 +1095,12 @@
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
- ULONG_CMP_GE(jiffies, rnp->boost_time))) {
+ (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
if (rnp->exp_tasks == NULL)
- rnp->boost_tasks = rnp->gp_tasks;
+ WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_wake_cond(rnp->boost_kthread_task,
- rnp->boost_kthread_status);
+ READ_ONCE(rnp->boost_kthread_status));
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
@@ -1263,10 +1282,9 @@
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
- * the energy-efficient dyntick-idle mode. This is handled by a
- * state machine implemented by rcu_prepare_for_idle() below.
+ * the energy-efficient dyntick-idle mode.
*
- * The following three proprocessor symbols control this state machine:
+ * The following preprocessor symbol controls this:
*
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
@@ -1275,21 +1293,15 @@
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
- * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
- * permitted to sleep in dyntick-idle mode with only lazy RCU
- * callbacks pending. Setting this too high can OOM your system.
*
- * The values below work well in practice. If future workloads require
+ * The value below works well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
-#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
-static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
-module_param(rcu_idle_lazy_gp_delay, int, 0644);
/*
* Try to advance callbacks on the current CPU, but only if it has been
@@ -1328,8 +1340,7 @@
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
- * caller to set the timeout based on whether or not there are non-lazy
- * callbacks.
+ * caller about what to set the timeout.
*
* The caller must have disabled interrupts.
*/
@@ -1355,25 +1366,18 @@
}
rdp->last_accelerate = jiffies;
- /* Request timer delay depending on laziness, and round. */
- rdp->all_lazy = !rcu_segcblist_n_nonlazy_cbs(&rdp->cblist);
- if (rdp->all_lazy) {
- dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
- } else {
- dj = round_up(rcu_idle_gp_delay + jiffies,
- rcu_idle_gp_delay) - jiffies;
- }
+ /* Request timer and round. */
+ dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
+
*nextevt = basemono + dj * TICK_NSEC;
return 0;
}
/*
- * Prepare a CPU for idle from an RCU perspective. The first major task
- * is to sense whether nohz mode has been enabled or disabled via sysfs.
- * The second major task is to check to see if a non-lazy callback has
- * arrived at a CPU that previously had only lazy callbacks. The third
- * major task is to accelerate (that is, assign grace-period numbers to)
- * any recently arrived callbacks.
+ * Prepare a CPU for idle from an RCU perspective. The first major task is to
+ * sense whether nohz mode has been enabled or disabled via sysfs. The second
+ * major task is to accelerate (that is, assign grace-period numbers to) any
+ * recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
@@ -1400,17 +1404,6 @@
return;
/*
- * If a non-lazy callback arrived at a CPU having only lazy
- * callbacks, invoke RCU core for the side-effect of recalculating
- * idle duration on re-entry to idle.
- */
- if (rdp->all_lazy && rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)) {
- rdp->all_lazy = false;
- invoke_rcu_core();
- return;
- }
-
- /*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
*/
@@ -1511,6 +1504,7 @@
* flag the contention.
*/
static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
+ __acquires(&rdp->nocb_bypass_lock)
{
lockdep_assert_irqs_disabled();
if (raw_spin_trylock(&rdp->nocb_bypass_lock))
@@ -1554,6 +1548,7 @@
* Release the specified rcu_data structure's ->nocb_bypass_lock.
*/
static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
+ __releases(&rdp->nocb_bypass_lock)
{
lockdep_assert_irqs_disabled();
raw_spin_unlock(&rdp->nocb_bypass_lock);
@@ -1602,8 +1597,7 @@
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
{
lockdep_assert_irqs_disabled();
- if (rcu_segcblist_is_offloaded(&rdp->cblist) &&
- cpu_online(rdp->cpu))
+ if (rcu_segcblist_is_offloaded(&rdp->cblist))
lockdep_assert_held(&rdp->nocb_lock);
}
@@ -1653,7 +1647,11 @@
rcu_nocb_unlock_irqrestore(rdp, flags);
return;
}
- del_timer(&rdp->nocb_timer);
+
+ if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
+ WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ del_timer(&rdp->nocb_timer);
+ }
rcu_nocb_unlock_irqrestore(rdp, flags);
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
@@ -1955,12 +1953,14 @@
struct rcu_data *rdp;
struct rcu_node *rnp;
unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
+ bool wasempty = false;
/*
* Each pass through the following loop checks for CBs and for the
* nearest grace period (if any) to wait for next. The CB kthreads
* and the global grace-period kthread are awakened if needed.
*/
+ WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
rcu_nocb_lock_irqsave(rdp, flags);
@@ -1994,10 +1994,13 @@
rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
needwake_gp = rcu_advance_cbs(rnp, rdp);
+ wasempty = rcu_segcblist_restempty(&rdp->cblist,
+ RCU_NEXT_READY_TAIL);
raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
}
// Need to wait on some grace period?
- WARN_ON_ONCE(!rcu_segcblist_restempty(&rdp->cblist,
+ WARN_ON_ONCE(wasempty &&
+ !rcu_segcblist_restempty(&rdp->cblist,
RCU_NEXT_READY_TAIL));
if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
if (!needwait_gp ||
@@ -2037,7 +2040,7 @@
/* Polling, so trace if first poll in the series. */
if (gotcbs)
trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
- schedule_timeout_interruptible(1);
+ schedule_timeout_idle(1);
} else if (!needwait_gp) {
/* Wait for callbacks to appear. */
trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
@@ -2166,7 +2169,6 @@
return;
}
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
- WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
}
@@ -2448,13 +2450,12 @@
return;
waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
- wastimer = timer_pending(&rdp->nocb_timer);
+ wastimer = timer_pending(&rdp->nocb_bypass_timer);
wassleep = swait_active(&rdp->nocb_gp_wq);
- if (!rdp->nocb_defer_wakeup && !rdp->nocb_gp_sleep &&
- !waslocked && !wastimer && !wassleep)
+ if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
return; /* Nothing untowards. */
- pr_info(" !!! %c%c%c%c %c\n",
+ pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
"lL"[waslocked],
"dD"[!!rdp->nocb_defer_wakeup],
"tT"[wastimer],
@@ -2559,7 +2560,7 @@
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(smp_processor_id()) &&
(!rcu_gp_in_progress() ||
- ULONG_CMP_LT(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
+ time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
return true;
#endif /* #ifdef CONFIG_NO_HZ_FULL */
return false;
@@ -2576,7 +2577,7 @@
}
/* Record the current task on dyntick-idle entry. */
-static void rcu_dynticks_task_enter(void)
+static __always_inline void rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
@@ -2584,9 +2585,27 @@
}
/* Record no current task on dyntick-idle exit. */
-static void rcu_dynticks_task_exit(void)
+static __always_inline void rcu_dynticks_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}
+
+/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
+static __always_inline void rcu_dynticks_task_trace_enter(void)
+{
+#ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+ current->trc_reader_special.b.need_mb = true;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+}
+
+/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
+static __always_inline void rcu_dynticks_task_trace_exit(void)
+{
+#ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+ current->trc_reader_special.b.need_mb = false;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+}