Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2bda9fd..a3ae00c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
return !RB_EMPTY_NODE(&dl_se->rb_node);
}
+#ifdef CONFIG_RT_MUTEXES
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
+{
+ return dl_se->pi_se;
+}
+
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
+{
+ return pi_of(dl_se) != dl_se;
+}
+#else
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
+{
+ return dl_se;
+}
+
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SMP
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -54,15 +76,49 @@ static inline struct dl_bw *dl_bw_of(int i)
static inline int dl_bw_cpus(int i)
{
struct root_domain *rd = cpu_rq(i)->rd;
- int cpus = 0;
+ int cpus;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
+
+ if (cpumask_subset(rd->span, cpu_active_mask))
+ return cpumask_weight(rd->span);
+
+ cpus = 0;
+
for_each_cpu_and(i, rd->span, cpu_active_mask)
cpus++;
return cpus;
}
+
+static inline unsigned long __dl_bw_capacity(int i)
+{
+ struct root_domain *rd = cpu_rq(i)->rd;
+ unsigned long cap = 0;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+
+ for_each_cpu_and(i, rd->span, cpu_active_mask)
+ cap += capacity_orig_of(i);
+
+ return cap;
+}
+
+/*
+ * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
+ * of the CPU the task is running on rather rd's \Sum CPU capacity.
+ */
+static inline unsigned long dl_bw_capacity(int i)
+{
+ if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
+ capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
+ return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
+ } else {
+ return __dl_bw_capacity(i);
+ }
+}
#else
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -73,6 +129,11 @@ static inline int dl_bw_cpus(int i)
{
return 1;
}
+
+static inline unsigned long dl_bw_capacity(int i)
+{
+ return SCHED_CAPACITY_SCALE;
+}
#endif
static inline
@@ -153,7 +214,7 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
__sub_running_bw(dl_se->dl_bw, dl_rq);
}
-void dl_change_utilization(struct task_struct *p, u64 new_bw)
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
{
struct rq *rq;
@@ -334,6 +395,8 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
return dl_rq->root.rb_leftmost == &dl_se->rb_node;
}
+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
+
void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
{
raw_spin_lock_init(&dl_b->dl_runtime_lock);
@@ -657,7 +720,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
- WARN_ON(dl_se->dl_boosted);
+ WARN_ON(is_dl_boosted(dl_se));
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
/*
@@ -695,21 +758,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
* could happen are, typically, a entity voluntarily trying to overcome its
* runtime, or it just underestimated it during sched_setattr().
*/
-static void replenish_dl_entity(struct sched_dl_entity *dl_se,
- struct sched_dl_entity *pi_se)
+static void replenish_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
- BUG_ON(pi_se->dl_runtime <= 0);
+ BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
/*
* This could be the case for a !-dl task that is boosted.
* Just go with full inherited parameters.
*/
if (dl_se->dl_deadline == 0) {
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
- dl_se->runtime = pi_se->dl_runtime;
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
if (dl_se->dl_yielded && dl_se->runtime > 0)
@@ -722,8 +784,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* arbitrary large.
*/
while (dl_se->runtime <= 0) {
- dl_se->deadline += pi_se->dl_period;
- dl_se->runtime += pi_se->dl_runtime;
+ dl_se->deadline += pi_of(dl_se)->dl_period;
+ dl_se->runtime += pi_of(dl_se)->dl_runtime;
}
/*
@@ -737,8 +799,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
printk_deferred_once("sched: DL replenish lagged too much\n");
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
- dl_se->runtime = pi_se->dl_runtime;
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
if (dl_se->dl_yielded)
@@ -771,8 +833,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* task with deadline equal to period this is the same of using
* dl_period instead of dl_deadline in the equation above.
*/
-static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
- struct sched_dl_entity *pi_se, u64 t)
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
{
u64 left, right;
@@ -794,9 +855,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
- left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+ left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
- (pi_se->dl_runtime >> DL_SCALE);
+ (pi_of(dl_se)->dl_runtime >> DL_SCALE);
return dl_time_before(right, left);
}
@@ -881,24 +942,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
* Please refer to the comments update_dl_revised_wakeup() function to find
* more about the Revised CBS rule.
*/
-static void update_dl_entity(struct sched_dl_entity *dl_se,
- struct sched_dl_entity *pi_se)
+static void update_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
- dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
+ dl_entity_overflow(dl_se, rq_clock(rq))) {
if (unlikely(!dl_is_implicit(dl_se) &&
!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
- !dl_se->dl_boosted)){
+ !is_dl_boosted(dl_se))) {
update_dl_revised_wakeup(dl_se, rq);
return;
}
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
- dl_se->runtime = pi_se->dl_runtime;
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
}
@@ -997,7 +1057,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* The task might have been boosted by someone else and might be in the
* boosting/deboosting path, its not throttled.
*/
- if (dl_se->dl_boosted)
+ if (is_dl_boosted(dl_se))
goto unlock;
/*
@@ -1025,7 +1085,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* but do not enqueue -- wait for our wakeup to do that.
*/
if (!task_on_rq_queued(p)) {
- replenish_dl_entity(dl_se, dl_se);
+ replenish_dl_entity(dl_se);
goto unlock;
}
@@ -1096,7 +1156,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
* cannot use the runtime, and so it replenishes the task. This rule
* works fine for implicit deadline tasks (deadline == period), and the
* CBS was designed for implicit deadline tasks. However, a task with
- * constrained deadline (deadine < period) might be awakened after the
+ * constrained deadline (deadline < period) might be awakened after the
* deadline, but before the next period. In this case, replenishing the
* task would allow it to run for runtime / deadline. As in this case
* deadline < period, CBS enables a task to run for more than the
@@ -1115,7 +1175,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
- if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
return;
dl_se->dl_throttled = 1;
if (dl_se->runtime > 0)
@@ -1246,7 +1306,7 @@ static void update_curr_dl(struct rq *rq)
dl_se->dl_overrun = 1;
__dequeue_task_dl(rq, curr, 0);
- if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
if (!is_leftmost(curr, &rq->dl))
@@ -1440,8 +1500,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
}
static void
-enqueue_dl_entity(struct sched_dl_entity *dl_se,
- struct sched_dl_entity *pi_se, int flags)
+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
BUG_ON(on_dl_rq(dl_se));
@@ -1452,9 +1511,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
*/
if (flags & ENQUEUE_WAKEUP) {
task_contending(dl_se, flags);
- update_dl_entity(dl_se, pi_se);
+ update_dl_entity(dl_se);
} else if (flags & ENQUEUE_REPLENISH) {
- replenish_dl_entity(dl_se, pi_se);
+ replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
@@ -1471,28 +1530,40 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
- struct task_struct *pi_task = rt_mutex_get_top_task(p);
- struct sched_dl_entity *pi_se = &p->dl;
-
- /*
- * Use the scheduling parameters of the top pi-waiter task if:
- * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
- * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
- * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
- * boosted due to a SCHED_DEADLINE pi-waiter).
- * Otherwise we keep our runtime and deadline.
- */
- if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
- pi_se = &pi_task->dl;
+ if (is_dl_boosted(&p->dl)) {
+ /*
+ * Because of delays in the detection of the overrun of a
+ * thread's runtime, it might be the case that a thread
+ * goes to sleep in a rt mutex with negative runtime. As
+ * a consequence, the thread will be throttled.
+ *
+ * While waiting for the mutex, this thread can also be
+ * boosted via PI, resulting in a thread that is throttled
+ * and boosted at the same time.
+ *
+ * In this case, the boost overrides the throttle.
+ */
+ if (p->dl.dl_throttled) {
+ /*
+ * The replenish timer needs to be canceled. No
+ * problem if it fires concurrently: boosted threads
+ * are ignored in dl_task_timer().
+ */
+ hrtimer_try_to_cancel(&p->dl.dl_timer);
+ p->dl.dl_throttled = 0;
+ }
} else if (!dl_prio(p->normal_prio)) {
/*
- * Special case in which we have a !SCHED_DEADLINE task
- * that is going to be deboosted, but exceeds its
- * runtime while doing so. No point in replenishing
- * it, as it's going to return back to its original
- * scheduling class after this.
+ * Special case in which we have a !SCHED_DEADLINE task that is going
+ * to be deboosted, but exceeds its runtime while doing so. No point in
+ * replenishing it, as it's going to return back to its original
+ * scheduling class after this. If it has been throttled, we need to
+ * clear the flag, otherwise the task may wake up as throttled after
+ * being boosted again with no means to replenish the runtime and clear
+ * the throttle.
*/
- BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
+ p->dl.dl_throttled = 0;
+ BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
return;
}
@@ -1529,7 +1600,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
return;
}
- enqueue_dl_entity(&p->dl, pi_se, flags);
+ enqueue_dl_entity(&p->dl, flags);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
@@ -1602,6 +1673,7 @@ static int
select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
+ bool select_rq;
struct rq *rq;
if (sd_flag != SD_BALANCE_WAKE)
@@ -1621,10 +1693,19 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
* other hand, if it has a shorter deadline, we
* try to make it stay here, it might be important.
*/
- if (unlikely(dl_task(curr)) &&
- (curr->nr_cpus_allowed < 2 ||
- !dl_entity_preempt(&p->dl, &curr->dl)) &&
- (p->nr_cpus_allowed > 1)) {
+ select_rq = unlikely(dl_task(curr)) &&
+ (curr->nr_cpus_allowed < 2 ||
+ !dl_entity_preempt(&p->dl, &curr->dl)) &&
+ p->nr_cpus_allowed > 1;
+
+ /*
+ * Take the capacity of the CPU into account to
+ * ensure it fits the requirement of the task.
+ */
+ if (static_branch_unlikely(&sched_asym_cpucapacity))
+ select_rq |= !dl_task_fits_capacity(p, cpu);
+
+ if (select_rq) {
int target = find_later_rq(p);
if (target != -1 &&
@@ -1774,15 +1855,12 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
return rb_entry(left, struct sched_dl_entity, rb_node);
}
-static struct task_struct *
-pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static struct task_struct *pick_next_task_dl(struct rq *rq)
{
struct sched_dl_entity *dl_se;
struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
- WARN_ON_ONCE(prev || rf);
-
if (!sched_dl_runnable(rq))
return NULL;
@@ -2434,8 +2512,8 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
}
}
-const struct sched_class dl_sched_class = {
- .next = &rt_sched_class,
+const struct sched_class dl_sched_class
+ __section("__dl_sched_class") = {
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
@@ -2503,7 +2581,7 @@ int sched_dl_global_validate(void)
return ret;
}
-void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
{
if (global_rt_runtime() == RUNTIME_INF) {
dl_rq->bw_ratio = 1 << RATIO_SHIFT;
@@ -2556,11 +2634,12 @@ void sched_dl_do_global(void)
int sched_dl_overflow(struct task_struct *p, int policy,
const struct sched_attr *attr)
{
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
u64 period = attr->sched_period ?: attr->sched_deadline;
u64 runtime = attr->sched_runtime;
u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
- int cpus, err = -1;
+ int cpus, err = -1, cpu = task_cpu(p);
+ struct dl_bw *dl_b = dl_bw_of(cpu);
+ unsigned long cap;
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return 0;
@@ -2575,15 +2654,17 @@ int sched_dl_overflow(struct task_struct *p, int policy,
* allocated bandwidth of the container.
*/
raw_spin_lock(&dl_b->lock);
- cpus = dl_bw_cpus(task_cpu(p));
+ cpus = dl_bw_cpus(cpu);
+ cap = dl_bw_capacity(cpu);
+
if (dl_policy(policy) && !task_has_dl_policy(p) &&
- !__dl_overflow(dl_b, cpus, 0, new_bw)) {
+ !__dl_overflow(dl_b, cap, 0, new_bw)) {
if (hrtimer_active(&p->dl.inactive_timer))
__dl_sub(dl_b, p->dl.dl_bw, cpus);
__dl_add(dl_b, new_bw, cpus);
err = 0;
} else if (dl_policy(policy) && task_has_dl_policy(p) &&
- !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
+ !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
/*
* XXX this is slightly incorrect: when the task
* utilization decreases, we should delay the total
@@ -2641,6 +2722,14 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
}
/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting rediculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2652,6 +2741,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2675,12 +2766,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
attr->sched_period & (1ULL << 63))
return false;
+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;
+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}
@@ -2698,11 +2798,14 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_bw = 0;
dl_se->dl_density = 0;
- dl_se->dl_boosted = 0;
dl_se->dl_throttled = 0;
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0;
+
+#ifdef CONFIG_RT_MUTEXES
+ dl_se->pi_se = dl_se;
+#endif
}
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
@@ -2721,19 +2824,19 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
#ifdef CONFIG_SMP
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
{
+ unsigned long flags, cap;
unsigned int dest_cpu;
struct dl_bw *dl_b;
bool overflow;
- int cpus, ret;
- unsigned long flags;
+ int ret;
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
rcu_read_lock_sched();
dl_b = dl_bw_of(dest_cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(dest_cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
+ cap = dl_bw_capacity(dest_cpu);
+ overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
if (overflow) {
ret = -EBUSY;
} else {
@@ -2743,6 +2846,8 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
* We will free resources in the source root_domain
* later on (see set_cpus_allowed_dl()).
*/
+ int cpus = dl_bw_cpus(dest_cpu);
+
__dl_add(dl_b, p->dl.dl_bw, cpus);
ret = 0;
}
@@ -2775,16 +2880,15 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
bool dl_cpu_busy(unsigned int cpu)
{
- unsigned long flags;
+ unsigned long flags, cap;
struct dl_bw *dl_b;
bool overflow;
- int cpus;
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, 0);
+ cap = dl_bw_capacity(cpu);
+ overflow = __dl_overflow(dl_b, cap, 0, 0);
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched();