Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fe755c1..08db8e0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -67,6 +67,7 @@
#include <linux/tsacct_kern.h>
#include <asm/tlb.h>
+#include <asm-generic/vmlinux.lds.h>
#ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
@@ -75,6 +76,8 @@
#include "cpupri.h"
#include "cpudeadline.h"
+#include <trace/events/sched.h>
+
#ifdef CONFIG_SCHED_DEBUG
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
#else
@@ -96,6 +99,7 @@
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
+extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
@@ -195,6 +199,19 @@
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+static inline void update_avg(u64 *avg, u64 sample)
+{
+ s64 diff = sample - *avg;
+ *avg += diff / 8;
+}
+
+/*
+ * Shifting a value by an exponent greater *or equal* to the size of said value
+ * is UB; cap at size-1.
+ */
+#define shr_bound(val, shift) \
+ (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
+
/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
@@ -300,14 +317,28 @@
__dl_update(dl_b, -((s32)tsk_bw / cpus));
}
-static inline
-bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
+static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
+ u64 old_bw, u64 new_bw)
{
return dl_b->bw != -1 &&
- dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
+ cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
}
-extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
+/*
+ * Verify the fitness of task @p to run on @cpu taking into account the
+ * CPU original capacity and the runtime/deadline ratio of the task.
+ *
+ * The function will return true if the CPU original capacity of the
+ * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
+ * task and false otherwise.
+ */
+static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
+{
+ unsigned long cap = arch_scale_cpu_capacity(cpu);
+
+ return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
+}
+
extern void init_dl_bw(struct dl_bw *dl_b);
extern int sched_dl_global_validate(void);
extern void sched_dl_do_global(void);
@@ -340,7 +371,6 @@
u8 idle;
u8 period_active;
- u8 distribute_running;
u8 slack_started;
struct hrtimer period_timer;
struct hrtimer slack_timer;
@@ -491,7 +521,6 @@
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
- unsigned long runnable_weight;
unsigned int nr_running;
unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
@@ -530,7 +559,7 @@
int nr;
unsigned long load_avg;
unsigned long util_avg;
- unsigned long runnable_sum;
+ unsigned long runnable_avg;
} removed;
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -690,8 +719,30 @@
#ifdef CONFIG_FAIR_GROUP_SCHED
/* An entity is a task if it doesn't "own" a runqueue */
#define entity_is_task(se) (!se->my_q)
+
+static inline void se_update_runnable(struct sched_entity *se)
+{
+ if (!entity_is_task(se))
+ se->runnable_weight = se->my_q->h_nr_running;
+}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+ if (entity_is_task(se))
+ return !!se->on_rq;
+ else
+ return se->runnable_weight;
+}
+
#else
#define entity_is_task(se) 1
+
+static inline void se_update_runnable(struct sched_entity *se) {}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+ return !!se->on_rq;
+}
#endif
#ifdef CONFIG_SMP
@@ -703,10 +754,6 @@
return scale_load_down(se->load.weight);
}
-static inline long se_runnable(struct sched_entity *se)
-{
- return scale_load_down(se->runnable_weight);
-}
static inline bool sched_asym_prefer(int a, int b)
{
@@ -864,15 +911,17 @@
#endif
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
- unsigned long last_load_update_tick;
unsigned long last_blocked_load_update_tick;
unsigned int has_blocked_load;
+ call_single_data_t nohz_csd;
#endif /* CONFIG_SMP */
unsigned int nohz_tick_stopped;
- atomic_t nohz_flags;
+ atomic_t nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
- unsigned long nr_load_updates;
+#ifdef CONFIG_SMP
+ unsigned int ttwu_pending;
+#endif
u64 nr_switches;
#ifdef CONFIG_UCLAMP_TASK
@@ -900,7 +949,7 @@
*/
unsigned long nr_uninterruptible;
- struct task_struct *curr;
+ struct task_struct __rcu *curr;
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
@@ -928,6 +977,7 @@
struct callback_head *balance_callback;
+ unsigned char nohz_idle_balance;
unsigned char idle_balance;
unsigned long misfit_task_load;
@@ -948,12 +998,15 @@
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
struct sched_avg avg_irq;
#endif
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+ struct sched_avg avg_thermal;
+#endif
u64 idle_stamp;
u64 avg_idle;
/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
-#endif
+#endif /* CONFIG_SMP */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
@@ -971,7 +1024,6 @@
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
- int hrtick_csd_pending;
call_single_data_t hrtick_csd;
#endif
struct hrtimer hrtick_timer;
@@ -996,10 +1048,6 @@
unsigned int ttwu_local;
#endif
-#ifdef CONFIG_SMP
- struct llist_head wake_list;
-#endif
-
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
@@ -1112,6 +1160,24 @@
return rq->clock_task;
}
+/**
+ * By default the decay is the default pelt decay period.
+ * The decay shift can change the decay period in
+ * multiples of 32.
+ * Decay shift Decay period(ms)
+ * 0 32
+ * 1 64
+ * 2 128
+ * 3 256
+ * 4 512
+ */
+extern int sched_thermal_decay_shift;
+
+static inline u64 rq_clock_thermal(struct rq *rq)
+{
+ return rq_clock_task(rq) >> sched_thermal_decay_shift;
+}
+
static inline void rq_clock_skip_update(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
@@ -1141,6 +1207,16 @@
#endif
};
+/*
+ * Lockdep annotation that avoids accidental unlocks; it's like a
+ * sticky/continuous lockdep_assert_held().
+ *
+ * This avoids code that has access to 'struct rq *rq' (basically everything in
+ * the scheduler) from accidentally unlocking the rq if they do not also have a
+ * copy of the (on-stack) 'struct rq_flags rf'.
+ *
+ * Also see Documentation/locking/lockdep-design.rst.
+ */
static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
{
rf->cookie = lockdep_pin_lock(&rq->lock);
@@ -1325,8 +1401,6 @@
rq->balance_callback = head;
}
-extern void sched_ttwu_pending(void);
-
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
lockdep_is_held(&sched_domains_mutex))
@@ -1342,8 +1416,6 @@
for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
__sd; __sd = __sd->parent)
-#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-
/**
* highest_flag_domain - Return highest sched_domain containing flag.
* @cpu: The CPU whose highest level of sched domain is to
@@ -1403,7 +1475,7 @@
int id;
#endif
- unsigned long cpumask[0]; /* Balance mask */
+ unsigned long cpumask[]; /* Balance mask */
};
struct sched_group {
@@ -1421,7 +1493,7 @@
* by attaching extra space to the end of the structure,
* depending on how many CPUs the kernel has booted up with)
*/
- unsigned long cpumask[0];
+ unsigned long cpumask[];
};
static inline struct cpumask *sched_group_span(struct sched_group *sg)
@@ -1464,15 +1536,11 @@
}
#endif
-extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+extern void flush_smp_call_function_from_idle(void);
-#else
-
-static inline void sched_ttwu_pending(void) { }
-
-static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
-
-#endif /* CONFIG_SMP */
+#else /* !CONFIG_SMP: */
+static inline void flush_smp_call_function_from_idle(void) { }
+#endif
#include "stats.h"
#include "autogroup.h"
@@ -1655,7 +1723,8 @@
*/
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* Child wakeup after fork */
-#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
+#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
+#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -1712,7 +1781,6 @@
#define RETRY_TASK ((void *)-1UL)
struct sched_class {
- const struct sched_class *next;
#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
@@ -1721,24 +1789,12 @@
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
- bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
+ bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
- /*
- * Both @prev and @rf are optional and may be NULL, in which case the
- * caller must already have invoked put_prev_task(rq, prev, rf).
- *
- * Otherwise it is the responsibility of the pick_next_task() to call
- * put_prev_task() on the @prev task or something equivalent, IFF it
- * returns a next task.
- *
- * In that case (@rf != NULL) it may return RETRY_TASK when it finds a
- * higher prio class has runnable tasks.
- */
- struct task_struct * (*pick_next_task)(struct rq *rq,
- struct task_struct *prev,
- struct rq_flags *rf);
+ struct task_struct *(*pick_next_task)(struct rq *rq);
+
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
@@ -1781,7 +1837,7 @@
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
#endif
-};
+} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
@@ -1795,17 +1851,18 @@
next->sched_class->set_next_task(rq, next, false);
}
-#ifdef CONFIG_SMP
-#define sched_class_highest (&stop_sched_class)
-#else
-#define sched_class_highest (&dl_sched_class)
-#endif
+/* Defined in include/asm-generic/vmlinux.lds.h */
+extern struct sched_class __begin_sched_classes[];
+extern struct sched_class __end_sched_classes[];
+
+#define sched_class_highest (__end_sched_classes - 1)
+#define sched_class_lowest (__begin_sched_classes - 1)
#define for_class_range(class, _from, _to) \
- for (class = (_from); class != (_to); class = class->next)
+ for (class = (_from); class != (_to); class--)
#define for_each_class(class) \
- for_class_range(class, sched_class_highest, NULL)
+ for_class_range(class, sched_class_highest, sched_class_lowest)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
@@ -1833,6 +1890,9 @@
return rq->cfs.nr_running > 0;
}
+extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+extern struct task_struct *pick_next_task_idle(struct rq *rq);
+
#ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu);
@@ -1890,7 +1950,6 @@
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
-extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
#define BW_SHIFT 20
#define BW_UNIT (1 << BW_SHIFT)
@@ -1913,12 +1972,7 @@
*/
static inline void sched_update_tick_dependency(struct rq *rq)
{
- int cpu;
-
- if (!tick_nohz_full_enabled())
- return;
-
- cpu = cpu_of(rq);
+ int cpu = cpu_of(rq);
if (!tick_nohz_full_cpu(cpu))
return;
@@ -1938,6 +1992,9 @@
unsigned prev_nr = rq->nr_running;
rq->nr_running = prev_nr + count;
+ if (trace_sched_update_nr_running_tp_enabled()) {
+ call_trace_sched_update_nr_running(rq, count);
+ }
#ifdef CONFIG_SMP
if (prev_nr < 2 && rq->nr_running >= 2) {
@@ -1952,6 +2009,10 @@
static inline void sub_nr_running(struct rq *rq, unsigned count)
{
rq->nr_running -= count;
+ if (trace_sched_update_nr_running_tp_enabled()) {
+ call_trace_sched_update_nr_running(rq, -count);
+ }
+
/* Check if we still need preemption */
sched_update_tick_dependency(rq);
}
@@ -1991,7 +2052,24 @@
#endif /* CONFIG_SCHED_HRTICK */
+#ifndef arch_scale_freq_tick
+static __always_inline
+void arch_scale_freq_tick(void)
+{
+}
+#endif
+
#ifndef arch_scale_freq_capacity
+/**
+ * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
+ * @cpu: the CPU in question.
+ *
+ * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
+ *
+ * f_curr
+ * ------ * SCHED_CAPACITY_SCALE
+ * f_max
+ */
static __always_inline
unsigned long arch_scale_freq_capacity(int cpu)
{
@@ -2323,10 +2401,10 @@
#endif /* CONFIG_CPU_FREQ */
#ifdef CONFIG_UCLAMP_TASK
-unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
/**
- * uclamp_util_with - clamp @util with @rq and @p effective uclamp values.
+ * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
* @rq: The rq to clamp against. Must not be NULL.
* @util: The util value to clamp.
* @p: The task to clamp against. Can be NULL if you want to clamp
@@ -2343,23 +2421,30 @@
* static key is disabled.
*/
static __always_inline
-unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
- struct task_struct *p)
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ struct task_struct *p)
{
- unsigned int min_util;
- unsigned int max_util;
+ unsigned long min_util = 0;
+ unsigned long max_util = 0;
if (!static_branch_likely(&sched_uclamp_used))
return util;
- min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
- max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
-
if (p) {
- min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
- max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
+ min_util = uclamp_eff_value(p, UCLAMP_MIN);
+ max_util = uclamp_eff_value(p, UCLAMP_MAX);
+
+ /*
+ * Ignore last runnable task's max clamp, as this task will
+ * reset it. Similarly, no need to read the rq's min clamp.
+ */
+ if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
+ goto out;
}
+ min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
+ max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
+out:
/*
* Since CPU's {min,max}_util clamps are MAX aggregated considering
* RUNNABLE tasks with _different_ clamps, we can end up with an
@@ -2371,11 +2456,6 @@
return clamp(util, min_util, max_util);
}
-static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
-{
- return uclamp_util_with(rq, util, NULL);
-}
-
/*
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
* by default in the fast path and only gets turned on once userspace performs
@@ -2389,12 +2469,9 @@
return static_branch_likely(&sched_uclamp_used);
}
#else /* CONFIG_UCLAMP_TASK */
-static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
- struct task_struct *p)
-{
- return util;
-}
-static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+static inline
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ struct task_struct *p)
{
return util;
}
@@ -2551,3 +2628,19 @@
{
}
#endif
+
+#ifdef CONFIG_SMP
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+ if (!(p->flags & PF_KTHREAD))
+ return false;
+
+ if (p->nr_cpus_allowed != 1)
+ return false;
+
+ return true;
+}
+#endif
+
+void swake_up_all_locked(struct swait_queue_head *q);
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);