Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b631722..c8870c5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -45,6 +45,7 @@
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/delayacct.h>
+#include <linux/energy_model.h>
#include <linux/init_task.h>
#include <linux/kprobes.h>
#include <linux/kthread.h>
@@ -55,9 +56,9 @@
#include <linux/proc_fs.h>
#include <linux/prefetch.h>
#include <linux/profile.h>
+#include <linux/psi.h>
#include <linux/rcupdate_wait.h>
#include <linux/security.h>
-#include <linux/stackprotector.h>
#include <linux/stop_machine.h>
#include <linux/suspend.h>
#include <linux/swait.h>
@@ -95,12 +96,6 @@
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-#ifdef CONFIG_SMP
-extern void cpu_load_update_active(struct rq *this_rq);
-#else
-static inline void cpu_load_update_active(struct rq *this_rq) { }
-#endif
-
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
@@ -177,6 +172,11 @@
rt_policy(policy) || dl_policy(policy);
}
+static inline int task_has_idle_policy(struct task_struct *p)
+{
+ return idle_policy(p->policy);
+}
+
static inline int task_has_rt_policy(struct task_struct *p)
{
return rt_policy(p->policy);
@@ -321,6 +321,7 @@
#ifdef CONFIG_CGROUP_SCHED
#include <linux/cgroup.h>
+#include <linux/psi.h>
struct cfs_rq;
struct rt_rq;
@@ -334,11 +335,11 @@
u64 quota;
u64 runtime;
s64 hierarchical_quota;
- u64 runtime_expires;
- int expires_seq;
- short idle;
- short period_active;
+ u8 idle;
+ u8 period_active;
+ u8 distribute_running;
+ u8 slack_started;
struct hrtimer period_timer;
struct hrtimer slack_timer;
struct list_head throttled_cfs_rq;
@@ -347,8 +348,6 @@
int nr_periods;
int nr_throttled;
u64 throttled_time;
-
- bool distribute_running;
#endif
};
@@ -392,6 +391,16 @@
#endif
struct cfs_bandwidth cfs_bandwidth;
+
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+ /* The two decimal precision [%] value requested from user-space */
+ unsigned int uclamp_pct[UCLAMP_CNT];
+ /* Clamp values requested for a task group */
+ struct uclamp_se uclamp_req[UCLAMP_CNT];
+ /* Effective clamp values used for a task group */
+ struct uclamp_se uclamp[UCLAMP_CNT];
+#endif
+
};
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -482,7 +491,8 @@
struct load_weight load;
unsigned long runnable_weight;
unsigned int nr_running;
- unsigned int h_nr_running;
+ unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
+ unsigned int idle_h_nr_running; /* SCHED_IDLE */
u64 exec_clock;
u64 min_vruntime;
@@ -555,8 +565,6 @@
#ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled;
- int expires_seq;
- u64 runtime_expires;
s64 runtime_remaining;
u64 throttled_clock;
@@ -631,7 +639,7 @@
/*
* Deadline values of the currently executing and the
* earliest ready task on this rq. Caching these facilitates
- * the decision wether or not a ready but not running task
+ * the decision whether or not a ready but not running task
* should migrate somewhere else.
*/
struct {
@@ -703,6 +711,16 @@
return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
}
+struct perf_domain {
+ struct em_perf_domain *em_pd;
+ struct perf_domain *next;
+ struct rcu_head rcu;
+};
+
+/* Scheduling group status flags */
+#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
+#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
+
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
@@ -718,8 +736,15 @@
cpumask_var_t span;
cpumask_var_t online;
- /* Indicate more than one runnable task for any CPU */
- bool overload;
+ /*
+ * Indicate pullable load on at least one CPU, e.g:
+ * - More than one runnable task
+ * - Running task is misfit
+ */
+ int overload;
+
+ /* Indicate one or more cpus over-utilized (tipping point) */
+ int overutilized;
/*
* The bit corresponding to a CPU gets set here if such CPU has more
@@ -751,10 +776,13 @@
struct cpupri cpupri;
unsigned long max_cpu_capacity;
-};
-extern struct root_domain def_root_domain;
-extern struct mutex sched_domains_mutex;
+ /*
+ * NULL-terminated list of performance domains intersecting with the
+ * CPUs of the rd. Protected by RCU.
+ */
+ struct perf_domain __rcu *pd;
+};
extern void init_defrootdomain(void);
extern int sched_init_domains(const struct cpumask *cpu_map);
@@ -767,6 +795,48 @@
#endif
#endif /* CONFIG_SMP */
+#ifdef CONFIG_UCLAMP_TASK
+/*
+ * struct uclamp_bucket - Utilization clamp bucket
+ * @value: utilization clamp value for tasks on this clamp bucket
+ * @tasks: number of RUNNABLE tasks on this clamp bucket
+ *
+ * Keep track of how many tasks are RUNNABLE for a given utilization
+ * clamp value.
+ */
+struct uclamp_bucket {
+ unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
+ unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
+};
+
+/*
+ * struct uclamp_rq - rq's utilization clamp
+ * @value: currently active clamp values for a rq
+ * @bucket: utilization clamp buckets affecting a rq
+ *
+ * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
+ * A clamp value is affecting a rq when there is at least one task RUNNABLE
+ * (or actually running) with that value.
+ *
+ * There are up to UCLAMP_CNT possible different clamp values, currently there
+ * are only two: minimum utilization and maximum utilization.
+ *
+ * All utilization clamping values are MAX aggregated, since:
+ * - for util_min: we want to run the CPU at least at the max of the minimum
+ * utilization required by its currently RUNNABLE tasks.
+ * - for util_max: we want to allow the CPU to run up to the max of the
+ * maximum utilization allowed by its currently RUNNABLE tasks.
+ *
+ * Since on each system we expect only a limited number of different
+ * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
+ * the metrics required to compute all the per-rq utilization clamp values.
+ */
+struct uclamp_rq {
+ unsigned int value;
+ struct uclamp_bucket bucket[UCLAMP_BUCKETS];
+};
+#endif /* CONFIG_UCLAMP_TASK */
+
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -788,8 +858,6 @@
unsigned int nr_preferred_running;
unsigned int numa_migrate_on;
#endif
- #define CPU_LOAD_IDX_MAX 5
- unsigned long cpu_load[CPU_LOAD_IDX_MAX];
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
unsigned long last_load_update_tick;
@@ -800,11 +868,16 @@
atomic_t nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
- /* capture load from *all* tasks on this CPU: */
- struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
+#ifdef CONFIG_UCLAMP_TASK
+ /* Utilization clamp values based on CPU's RUNNABLE tasks */
+ struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
+ unsigned int uclamp_flags;
+#define UCLAMP_FLAG_IDLE 0x01
+#endif
+
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
@@ -831,13 +904,20 @@
unsigned int clock_update_flags;
u64 clock;
- u64 clock_task;
+ /* Ensure that all clocks are in the same cache line */
+ u64 clock_task ____cacheline_aligned;
+ u64 clock_pelt;
+ unsigned long lost_idle_time;
atomic_t nr_iowait;
+#ifdef CONFIG_MEMBARRIER
+ int membarrier_state;
+#endif
+
#ifdef CONFIG_SMP
- struct root_domain *rd;
- struct sched_domain *sd;
+ struct root_domain *rd;
+ struct sched_domain __rcu *sd;
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
@@ -846,6 +926,8 @@
unsigned char idle_balance;
+ unsigned long misfit_task_load;
+
/* For active balancing */
int active_balance;
int push_cpu;
@@ -919,6 +1001,22 @@
#endif
};
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+/* CPU runqueue to which this cfs_rq is attached */
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->rq;
+}
+
+#else
+
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+ return container_of(cfs_rq, struct rq, cfs);
+}
+#endif
+
static inline int cpu_of(struct rq *rq)
{
#ifdef CONFIG_SMP
@@ -950,6 +1048,8 @@
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+extern void update_rq_clock(struct rq *rq);
+
static inline u64 __rq_clock_broken(struct rq *rq)
{
return READ_ONCE(rq->clock);
@@ -1068,712 +1168,6 @@
#endif
}
-#ifdef CONFIG_NUMA
-enum numa_topology_type {
- NUMA_DIRECT,
- NUMA_GLUELESS_MESH,
- NUMA_BACKPLANE,
-};
-extern enum numa_topology_type sched_numa_topology_type;
-extern int sched_max_numa_distance;
-extern bool find_numa_distance(int distance);
-#endif
-
-#ifdef CONFIG_NUMA
-extern void sched_init_numa(void);
-extern void sched_domains_numa_masks_set(unsigned int cpu);
-extern void sched_domains_numa_masks_clear(unsigned int cpu);
-#else
-static inline void sched_init_numa(void) { }
-static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
-static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
-#endif
-
-#ifdef CONFIG_NUMA_BALANCING
-/* The regions in numa_faults array from task_struct */
-enum numa_faults_stats {
- NUMA_MEM = 0,
- NUMA_CPU,
- NUMA_MEMBUF,
- NUMA_CPUBUF
-};
-extern void sched_setnuma(struct task_struct *p, int node);
-extern int migrate_task_to(struct task_struct *p, int cpu);
-extern int migrate_swap(struct task_struct *p, struct task_struct *t,
- int cpu, int scpu);
-extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
-#else
-static inline void
-init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
-{
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-#ifdef CONFIG_SMP
-
-static inline void
-queue_balance_callback(struct rq *rq,
- struct callback_head *head,
- void (*func)(struct rq *rq))
-{
- lockdep_assert_held(&rq->lock);
-
- if (unlikely(head->next))
- return;
-
- head->func = (void (*)(struct callback_head *))func;
- head->next = rq->balance_callback;
- rq->balance_callback = head;
-}
-
-extern void sched_ttwu_pending(void);
-
-#define rcu_dereference_check_sched_domain(p) \
- rcu_dereference_check((p), \
- lockdep_is_held(&sched_domains_mutex))
-
-/*
- * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
- * See detach_destroy_domains: synchronize_sched for details.
- *
- * The domain tree of any CPU may only be accessed from within
- * preempt-disabled sections.
- */
-#define for_each_domain(cpu, __sd) \
- for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
- __sd; __sd = __sd->parent)
-
-#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-
-/**
- * highest_flag_domain - Return highest sched_domain containing flag.
- * @cpu: The CPU whose highest level of sched domain is to
- * be returned.
- * @flag: The flag to check for the highest sched_domain
- * for the given CPU.
- *
- * Returns the highest sched_domain of a CPU which contains the given flag.
- */
-static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-{
- struct sched_domain *sd, *hsd = NULL;
-
- for_each_domain(cpu, sd) {
- if (!(sd->flags & flag))
- break;
- hsd = sd;
- }
-
- return hsd;
-}
-
-static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
-{
- struct sched_domain *sd;
-
- for_each_domain(cpu, sd) {
- if (sd->flags & flag)
- break;
- }
-
- return sd;
-}
-
-DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-DECLARE_PER_CPU(int, sd_llc_size);
-DECLARE_PER_CPU(int, sd_llc_id);
-DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-DECLARE_PER_CPU(struct sched_domain *, sd_asym);
-
-struct sched_group_capacity {
- atomic_t ref;
- /*
- * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
- * for a single CPU.
- */
- unsigned long capacity;
- unsigned long min_capacity; /* Min per-CPU capacity in group */
- unsigned long next_update;
- int imbalance; /* XXX unrelated to capacity but shared group state */
-
-#ifdef CONFIG_SCHED_DEBUG
- int id;
-#endif
-
- unsigned long cpumask[0]; /* Balance mask */
-};
-
-struct sched_group {
- struct sched_group *next; /* Must be a circular list */
- atomic_t ref;
-
- unsigned int group_weight;
- struct sched_group_capacity *sgc;
- int asym_prefer_cpu; /* CPU of highest priority in group */
-
- /*
- * The CPUs this group covers.
- *
- * NOTE: this field is variable length. (Allocated dynamically
- * by attaching extra space to the end of the structure,
- * depending on how many CPUs the kernel has booted up with)
- */
- unsigned long cpumask[0];
-};
-
-static inline struct cpumask *sched_group_span(struct sched_group *sg)
-{
- return to_cpumask(sg->cpumask);
-}
-
-/*
- * See build_balance_mask().
- */
-static inline struct cpumask *group_balance_mask(struct sched_group *sg)
-{
- return to_cpumask(sg->sgc->cpumask);
-}
-
-/**
- * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
- * @group: The group whose first CPU is to be returned.
- */
-static inline unsigned int group_first_cpu(struct sched_group *group)
-{
- return cpumask_first(sched_group_span(group));
-}
-
-extern int group_balance_cpu(struct sched_group *sg);
-
-#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-void register_sched_domain_sysctl(void);
-void dirty_sched_domain_sysctl(int cpu);
-void unregister_sched_domain_sysctl(void);
-#else
-static inline void register_sched_domain_sysctl(void)
-{
-}
-static inline void dirty_sched_domain_sysctl(int cpu)
-{
-}
-static inline void unregister_sched_domain_sysctl(void)
-{
-}
-#endif
-
-#else
-
-static inline void sched_ttwu_pending(void) { }
-
-#endif /* CONFIG_SMP */
-
-#include "stats.h"
-#include "autogroup.h"
-
-#ifdef CONFIG_CGROUP_SCHED
-
-/*
- * Return the group to which this tasks belongs.
- *
- * We cannot use task_css() and friends because the cgroup subsystem
- * changes that value before the cgroup_subsys::attach() method is called,
- * therefore we cannot pin it and might observe the wrong value.
- *
- * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
- * core changes this before calling sched_move_task().
- *
- * Instead we use a 'copy' which is updated from sched_move_task() while
- * holding both task_struct::pi_lock and rq::lock.
- */
-static inline struct task_group *task_group(struct task_struct *p)
-{
- return p->sched_task_group;
-}
-
-/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
-{
-#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
- struct task_group *tg = task_group(p);
-#endif
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
- p->se.cfs_rq = tg->cfs_rq[cpu];
- p->se.parent = tg->se[cpu];
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- p->rt.rt_rq = tg->rt_rq[cpu];
- p->rt.parent = tg->rt_se[cpu];
-#endif
-}
-
-#else /* CONFIG_CGROUP_SCHED */
-
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline struct task_group *task_group(struct task_struct *p)
-{
- return NULL;
-}
-
-#endif /* CONFIG_CGROUP_SCHED */
-
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
- set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
- /*
- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
- * successfuly executed on another CPU. We must ensure that updates of
- * per-task data have been completed by this moment.
- */
- smp_wmb();
-#ifdef CONFIG_THREAD_INFO_IN_TASK
- p->cpu = cpu;
-#else
- task_thread_info(p)->cpu = cpu;
-#endif
- p->wake_cpu = cpu;
-#endif
-}
-
-/*
- * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
- */
-#ifdef CONFIG_SCHED_DEBUG
-# include <linux/static_key.h>
-# define const_debug __read_mostly
-#else
-# define const_debug const
-#endif
-
-#define SCHED_FEAT(name, enabled) \
- __SCHED_FEAT_##name ,
-
-enum {
-#include "features.h"
- __SCHED_FEAT_NR,
-};
-
-#undef SCHED_FEAT
-
-#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
-
-/*
- * To support run-time toggling of sched features, all the translation units
- * (but core.c) reference the sysctl_sched_features defined in core.c.
- */
-extern const_debug unsigned int sysctl_sched_features;
-
-#define SCHED_FEAT(name, enabled) \
-static __always_inline bool static_branch_##name(struct static_key *key) \
-{ \
- return static_key_##enabled(key); \
-}
-
-#include "features.h"
-#undef SCHED_FEAT
-
-extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
-#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
-
-#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
-
-/*
- * Each translation unit has its own copy of sysctl_sched_features to allow
- * constants propagation at compile time and compiler optimization based on
- * features default.
- */
-#define SCHED_FEAT(name, enabled) \
- (1UL << __SCHED_FEAT_##name) * enabled |
-static const_debug __maybe_unused unsigned int sysctl_sched_features =
-#include "features.h"
- 0;
-#undef SCHED_FEAT
-
-#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
-
-#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
-
-extern struct static_key_false sched_numa_balancing;
-extern struct static_key_false sched_schedstats;
-
-static inline u64 global_rt_period(void)
-{
- return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
-}
-
-static inline u64 global_rt_runtime(void)
-{
- if (sysctl_sched_rt_runtime < 0)
- return RUNTIME_INF;
-
- return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
-}
-
-static inline int task_current(struct rq *rq, struct task_struct *p)
-{
- return rq->curr == p;
-}
-
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
- return p->on_cpu;
-#else
- return task_current(rq, p);
-#endif
-}
-
-static inline int task_on_rq_queued(struct task_struct *p)
-{
- return p->on_rq == TASK_ON_RQ_QUEUED;
-}
-
-static inline int task_on_rq_migrating(struct task_struct *p)
-{
- return p->on_rq == TASK_ON_RQ_MIGRATING;
-}
-
-/*
- * wake flags
- */
-#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
-#define WF_FORK 0x02 /* Child wakeup after fork */
-#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
-
-/*
- * To aid in avoiding the subversion of "niceness" due to uneven distribution
- * of tasks with abnormal "nice" values across CPUs the contribution that
- * each task makes to its run queue's load is weighted according to its
- * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
- * scaled version of the new time slice allocation that they receive on time
- * slice expiry etc.
- */
-
-#define WEIGHT_IDLEPRIO 3
-#define WMULT_IDLEPRIO 1431655765
-
-extern const int sched_prio_to_weight[40];
-extern const u32 sched_prio_to_wmult[40];
-
-/*
- * {de,en}queue flags:
- *
- * DEQUEUE_SLEEP - task is no longer runnable
- * ENQUEUE_WAKEUP - task just became runnable
- *
- * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
- * are in a known state which allows modification. Such pairs
- * should preserve as much state as possible.
- *
- * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
- * in the runqueue.
- *
- * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
- * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
- * ENQUEUE_MIGRATED - the task was migrated during wakeup
- *
- */
-
-#define DEQUEUE_SLEEP 0x01
-#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
-#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
-#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
-
-#define ENQUEUE_WAKEUP 0x01
-#define ENQUEUE_RESTORE 0x02
-#define ENQUEUE_MOVE 0x04
-#define ENQUEUE_NOCLOCK 0x08
-
-#define ENQUEUE_HEAD 0x10
-#define ENQUEUE_REPLENISH 0x20
-#ifdef CONFIG_SMP
-#define ENQUEUE_MIGRATED 0x40
-#else
-#define ENQUEUE_MIGRATED 0x00
-#endif
-
-#define RETRY_TASK ((void *)-1UL)
-
-struct sched_class {
- const struct sched_class *next;
-
- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
- void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
- void (*yield_task) (struct rq *rq);
- bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
-
- void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
-
- /*
- * It is the responsibility of the pick_next_task() method that will
- * return the next task to call put_prev_task() on the @prev task or
- * something equivalent.
- *
- * May return RETRY_TASK when it finds a higher prio class has runnable
- * tasks.
- */
- struct task_struct * (*pick_next_task)(struct rq *rq,
- struct task_struct *prev,
- struct rq_flags *rf);
- void (*put_prev_task)(struct rq *rq, struct task_struct *p);
-
-#ifdef CONFIG_SMP
- int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
- void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
-
- void (*task_woken)(struct rq *this_rq, struct task_struct *task);
-
- void (*set_cpus_allowed)(struct task_struct *p,
- const struct cpumask *newmask);
-
- void (*rq_online)(struct rq *rq);
- void (*rq_offline)(struct rq *rq);
-#endif
-
- void (*set_curr_task)(struct rq *rq);
- void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
- void (*task_fork)(struct task_struct *p);
- void (*task_dead)(struct task_struct *p);
-
- /*
- * The switched_from() call is allowed to drop rq->lock, therefore we
- * cannot assume the switched_from/switched_to pair is serliazed by
- * rq->lock. They are however serialized by p->pi_lock.
- */
- void (*switched_from)(struct rq *this_rq, struct task_struct *task);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task);
- void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
- int oldprio);
-
- unsigned int (*get_rr_interval)(struct rq *rq,
- struct task_struct *task);
-
- void (*update_curr)(struct rq *rq);
-
-#define TASK_SET_GROUP 0
-#define TASK_MOVE_GROUP 1
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
- void (*task_change_group)(struct task_struct *p, int type);
-#endif
-};
-
-static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
-{
- prev->sched_class->put_prev_task(rq, prev);
-}
-
-static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
-{
- curr->sched_class->set_curr_task(rq);
-}
-
-#ifdef CONFIG_SMP
-#define sched_class_highest (&stop_sched_class)
-#else
-#define sched_class_highest (&dl_sched_class)
-#endif
-#define for_each_class(class) \
- for (class = sched_class_highest; class; class = class->next)
-
-extern const struct sched_class stop_sched_class;
-extern const struct sched_class dl_sched_class;
-extern const struct sched_class rt_sched_class;
-extern const struct sched_class fair_sched_class;
-extern const struct sched_class idle_sched_class;
-
-
-#ifdef CONFIG_SMP
-
-extern void update_group_capacity(struct sched_domain *sd, int cpu);
-
-extern void trigger_load_balance(struct rq *rq);
-
-extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-
-#endif
-
-#ifdef CONFIG_CPU_IDLE
-static inline void idle_set_state(struct rq *rq,
- struct cpuidle_state *idle_state)
-{
- rq->idle_state = idle_state;
-}
-
-static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-{
- SCHED_WARN_ON(!rcu_read_lock_held());
-
- return rq->idle_state;
-}
-#else
-static inline void idle_set_state(struct rq *rq,
- struct cpuidle_state *idle_state)
-{
-}
-
-static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-{
- return NULL;
-}
-#endif
-
-extern void schedule_idle(void);
-
-extern void sysrq_sched_debug_show(void);
-extern void sched_init_granularity(void);
-extern void update_max_interval(void);
-
-extern void init_sched_dl_class(void);
-extern void init_sched_rt_class(void);
-extern void init_sched_fair_class(void);
-
-extern void reweight_task(struct task_struct *p, int prio);
-
-extern void resched_curr(struct rq *rq);
-extern void resched_cpu(int cpu);
-
-extern struct rt_bandwidth def_rt_bandwidth;
-extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-
-extern struct dl_bandwidth def_dl_bandwidth;
-extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
-extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
-extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
-extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
-
-#define BW_SHIFT 20
-#define BW_UNIT (1 << BW_SHIFT)
-#define RATIO_SHIFT 8
-unsigned long to_ratio(u64 period, u64 runtime);
-
-extern void init_entity_runnable_average(struct sched_entity *se);
-extern void post_init_entity_util_avg(struct sched_entity *se);
-
-#ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(struct rq *rq);
-extern int __init sched_tick_offload_init(void);
-
-/*
- * Tick may be needed by tasks in the runqueue depending on their policy and
- * requirements. If tick is needed, lets send the target an IPI to kick it out of
- * nohz mode if necessary.
- */
-static inline void sched_update_tick_dependency(struct rq *rq)
-{
- int cpu;
-
- if (!tick_nohz_full_enabled())
- return;
-
- cpu = cpu_of(rq);
-
- if (!tick_nohz_full_cpu(cpu))
- return;
-
- if (sched_can_stop_tick(rq))
- tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
- else
- tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-}
-#else
-static inline int sched_tick_offload_init(void) { return 0; }
-static inline void sched_update_tick_dependency(struct rq *rq) { }
-#endif
-
-static inline void add_nr_running(struct rq *rq, unsigned count)
-{
- unsigned prev_nr = rq->nr_running;
-
- rq->nr_running = prev_nr + count;
-
- if (prev_nr < 2 && rq->nr_running >= 2) {
-#ifdef CONFIG_SMP
- if (!rq->rd->overload)
- rq->rd->overload = true;
-#endif
- }
-
- sched_update_tick_dependency(rq);
-}
-
-static inline void sub_nr_running(struct rq *rq, unsigned count)
-{
- rq->nr_running -= count;
- /* Check if we still need preemption */
- sched_update_tick_dependency(rq);
-}
-
-extern void update_rq_clock(struct rq *rq);
-
-extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
-extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
-
-extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
-
-extern const_debug unsigned int sysctl_sched_nr_migrate;
-extern const_debug unsigned int sysctl_sched_migration_cost;
-
-#ifdef CONFIG_SCHED_HRTICK
-
-/*
- * Use hrtick when:
- * - enabled by features
- * - hrtimer is actually high res
- */
-static inline int hrtick_enabled(struct rq *rq)
-{
- if (!sched_feat(HRTICK))
- return 0;
- if (!cpu_active(cpu_of(rq)))
- return 0;
- return hrtimer_is_hres_active(&rq->hrtick_timer);
-}
-
-void hrtick_start(struct rq *rq, u64 delay);
-
-#else
-
-static inline int hrtick_enabled(struct rq *rq)
-{
- return 0;
-}
-
-#endif /* CONFIG_SCHED_HRTICK */
-
-#ifndef arch_scale_freq_capacity
-static __always_inline
-unsigned long arch_scale_freq_capacity(int cpu)
-{
- return SCHED_CAPACITY_SCALE;
-}
-#endif
-
-#ifdef CONFIG_SMP
-#ifndef arch_scale_cpu_capacity
-static __always_inline
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
- if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
- return sd->smt_gain / sd->span_weight;
-
- return SCHED_CAPACITY_SCALE;
-}
-#endif
-#else
-#ifndef arch_scale_cpu_capacity
-static __always_inline
-unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
-{
- return SCHED_CAPACITY_SCALE;
-}
-#endif
-#endif
-
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(rq->lock);
@@ -1854,8 +1248,745 @@
raw_spin_unlock(&rq->lock);
}
+static inline struct rq *
+this_rq_lock_irq(struct rq_flags *rf)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ local_irq_disable();
+ rq = this_rq();
+ rq_lock(rq, rf);
+ return rq;
+}
+
+#ifdef CONFIG_NUMA
+enum numa_topology_type {
+ NUMA_DIRECT,
+ NUMA_GLUELESS_MESH,
+ NUMA_BACKPLANE,
+};
+extern enum numa_topology_type sched_numa_topology_type;
+extern int sched_max_numa_distance;
+extern bool find_numa_distance(int distance);
+extern void sched_init_numa(void);
+extern void sched_domains_numa_masks_set(unsigned int cpu);
+extern void sched_domains_numa_masks_clear(unsigned int cpu);
+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
+#else
+static inline void sched_init_numa(void) { }
+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+ return nr_cpu_ids;
+}
+#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+/* The regions in numa_faults array from task_struct */
+enum numa_faults_stats {
+ NUMA_MEM = 0,
+ NUMA_CPU,
+ NUMA_MEMBUF,
+ NUMA_CPUBUF
+};
+extern void sched_setnuma(struct task_struct *p, int node);
+extern int migrate_task_to(struct task_struct *p, int cpu);
+extern int migrate_swap(struct task_struct *p, struct task_struct *t,
+ int cpu, int scpu);
+extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
+#else
+static inline void
+init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
#ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPT
+
+static inline void
+queue_balance_callback(struct rq *rq,
+ struct callback_head *head,
+ void (*func)(struct rq *rq))
+{
+ lockdep_assert_held(&rq->lock);
+
+ if (unlikely(head->next))
+ return;
+
+ head->func = (void (*)(struct callback_head *))func;
+ head->next = rq->balance_callback;
+ rq->balance_callback = head;
+}
+
+extern void sched_ttwu_pending(void);
+
+#define rcu_dereference_check_sched_domain(p) \
+ rcu_dereference_check((p), \
+ lockdep_is_held(&sched_domains_mutex))
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See destroy_sched_domains: call_rcu for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
+ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
+ __sd; __sd = __sd->parent)
+
+#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
+
+/**
+ * highest_flag_domain - Return highest sched_domain containing flag.
+ * @cpu: The CPU whose highest level of sched domain is to
+ * be returned.
+ * @flag: The flag to check for the highest sched_domain
+ * for the given CPU.
+ *
+ * Returns the highest sched_domain of a CPU which contains the given flag.
+ */
+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
+{
+ struct sched_domain *sd, *hsd = NULL;
+
+ for_each_domain(cpu, sd) {
+ if (!(sd->flags & flag))
+ break;
+ hsd = sd;
+ }
+
+ return hsd;
+}
+
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+ struct sched_domain *sd;
+
+ for_each_domain(cpu, sd) {
+ if (sd->flags & flag)
+ break;
+ }
+
+ return sd;
+}
+
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
+DECLARE_PER_CPU(int, sd_llc_size);
+DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
+extern struct static_key_false sched_asym_cpucapacity;
+
+struct sched_group_capacity {
+ atomic_t ref;
+ /*
+ * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
+ * for a single CPU.
+ */
+ unsigned long capacity;
+ unsigned long min_capacity; /* Min per-CPU capacity in group */
+ unsigned long max_capacity; /* Max per-CPU capacity in group */
+ unsigned long next_update;
+ int imbalance; /* XXX unrelated to capacity but shared group state */
+
+#ifdef CONFIG_SCHED_DEBUG
+ int id;
+#endif
+
+ unsigned long cpumask[0]; /* Balance mask */
+};
+
+struct sched_group {
+ struct sched_group *next; /* Must be a circular list */
+ atomic_t ref;
+
+ unsigned int group_weight;
+ struct sched_group_capacity *sgc;
+ int asym_prefer_cpu; /* CPU of highest priority in group */
+
+ /*
+ * The CPUs this group covers.
+ *
+ * NOTE: this field is variable length. (Allocated dynamically
+ * by attaching extra space to the end of the structure,
+ * depending on how many CPUs the kernel has booted up with)
+ */
+ unsigned long cpumask[0];
+};
+
+static inline struct cpumask *sched_group_span(struct sched_group *sg)
+{
+ return to_cpumask(sg->cpumask);
+}
+
+/*
+ * See build_balance_mask().
+ */
+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
+{
+ return to_cpumask(sg->sgc->cpumask);
+}
+
+/**
+ * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
+ * @group: The group whose first CPU is to be returned.
+ */
+static inline unsigned int group_first_cpu(struct sched_group *group)
+{
+ return cpumask_first(sched_group_span(group));
+}
+
+extern int group_balance_cpu(struct sched_group *sg);
+
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+void register_sched_domain_sysctl(void);
+void dirty_sched_domain_sysctl(int cpu);
+void unregister_sched_domain_sysctl(void);
+#else
+static inline void register_sched_domain_sysctl(void)
+{
+}
+static inline void dirty_sched_domain_sysctl(int cpu)
+{
+}
+static inline void unregister_sched_domain_sysctl(void)
+{
+}
+#endif
+
+extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+
+#else
+
+static inline void sched_ttwu_pending(void) { }
+
+static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
+
+#endif /* CONFIG_SMP */
+
+#include "stats.h"
+#include "autogroup.h"
+
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We cannot use task_css() and friends because the cgroup subsystem
+ * changes that value before the cgroup_subsys::attach() method is called,
+ * therefore we cannot pin it and might observe the wrong value.
+ *
+ * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
+ * core changes this before calling sched_move_task().
+ *
+ * Instead we use a 'copy' which is updated from sched_move_task() while
+ * holding both task_struct::pi_lock and rq::lock.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ return p->sched_task_group;
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
+ struct task_group *tg = task_group(p);
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
+ p->se.cfs_rq = tg->cfs_rq[cpu];
+ p->se.parent = tg->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ p->rt.rt_rq = tg->rt_rq[cpu];
+ p->rt.parent = tg->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+ /*
+ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+ * successfully executed on another CPU. We must ensure that updates of
+ * per-task data have been completed by this moment.
+ */
+ smp_wmb();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ WRITE_ONCE(p->cpu, cpu);
+#else
+ WRITE_ONCE(task_thread_info(p)->cpu, cpu);
+#endif
+ p->wake_cpu = cpu;
+#endif
+}
+
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# include <linux/static_key.h>
+# define const_debug __read_mostly
+#else
+# define const_debug const
+#endif
+
+#define SCHED_FEAT(name, enabled) \
+ __SCHED_FEAT_##name ,
+
+enum {
+#include "features.h"
+ __SCHED_FEAT_NR,
+};
+
+#undef SCHED_FEAT
+
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
+
+/*
+ * To support run-time toggling of sched features, all the translation units
+ * (but core.c) reference the sysctl_sched_features defined in core.c.
+ */
+extern const_debug unsigned int sysctl_sched_features;
+
+#define SCHED_FEAT(name, enabled) \
+static __always_inline bool static_branch_##name(struct static_key *key) \
+{ \
+ return static_key_##enabled(key); \
+}
+
+#include "features.h"
+#undef SCHED_FEAT
+
+extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
+#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
+
+#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */
+
+/*
+ * Each translation unit has its own copy of sysctl_sched_features to allow
+ * constants propagation at compile time and compiler optimization based on
+ * features default.
+ */
+#define SCHED_FEAT(name, enabled) \
+ (1UL << __SCHED_FEAT_##name) * enabled |
+static const_debug __maybe_unused unsigned int sysctl_sched_features =
+#include "features.h"
+ 0;
+#undef SCHED_FEAT
+
+#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
+
+#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */
+
+extern struct static_key_false sched_numa_balancing;
+extern struct static_key_false sched_schedstats;
+
+static inline u64 global_rt_period(void)
+{
+ return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
+}
+
+static inline u64 global_rt_runtime(void)
+{
+ if (sysctl_sched_rt_runtime < 0)
+ return RUNTIME_INF;
+
+ return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+}
+
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+ return rq->curr == p;
+}
+
+static inline int task_running(struct rq *rq, struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+ return p->on_cpu;
+#else
+ return task_current(rq, p);
+#endif
+}
+
+static inline int task_on_rq_queued(struct task_struct *p)
+{
+ return p->on_rq == TASK_ON_RQ_QUEUED;
+}
+
+static inline int task_on_rq_migrating(struct task_struct *p)
+{
+ return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
+}
+
+/*
+ * wake flags
+ */
+#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
+#define WF_FORK 0x02 /* Child wakeup after fork */
+#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
+
+/*
+ * To aid in avoiding the subversion of "niceness" due to uneven distribution
+ * of tasks with abnormal "nice" values across CPUs the contribution that
+ * each task makes to its run queue's load is weighted according to its
+ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
+ * scaled version of the new time slice allocation that they receive on time
+ * slice expiry etc.
+ */
+
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
+
+extern const int sched_prio_to_weight[40];
+extern const u32 sched_prio_to_wmult[40];
+
+/*
+ * {de,en}queue flags:
+ *
+ * DEQUEUE_SLEEP - task is no longer runnable
+ * ENQUEUE_WAKEUP - task just became runnable
+ *
+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
+ * are in a known state which allows modification. Such pairs
+ * should preserve as much state as possible.
+ *
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ * in the runqueue.
+ *
+ * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_MIGRATED - the task was migrated during wakeup
+ *
+ */
+
+#define DEQUEUE_SLEEP 0x01
+#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
+#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
+#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
+
+#define ENQUEUE_WAKEUP 0x01
+#define ENQUEUE_RESTORE 0x02
+#define ENQUEUE_MOVE 0x04
+#define ENQUEUE_NOCLOCK 0x08
+
+#define ENQUEUE_HEAD 0x10
+#define ENQUEUE_REPLENISH 0x20
+#ifdef CONFIG_SMP
+#define ENQUEUE_MIGRATED 0x40
+#else
+#define ENQUEUE_MIGRATED 0x00
+#endif
+
+#define RETRY_TASK ((void *)-1UL)
+
+struct sched_class {
+ const struct sched_class *next;
+
+#ifdef CONFIG_UCLAMP_TASK
+ int uclamp_enabled;
+#endif
+
+ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
+ void (*yield_task) (struct rq *rq);
+ bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
+
+ void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
+
+ /*
+ * Both @prev and @rf are optional and may be NULL, in which case the
+ * caller must already have invoked put_prev_task(rq, prev, rf).
+ *
+ * Otherwise it is the responsibility of the pick_next_task() to call
+ * put_prev_task() on the @prev task or something equivalent, IFF it
+ * returns a next task.
+ *
+ * In that case (@rf != NULL) it may return RETRY_TASK when it finds a
+ * higher prio class has runnable tasks.
+ */
+ struct task_struct * (*pick_next_task)(struct rq *rq,
+ struct task_struct *prev,
+ struct rq_flags *rf);
+ void (*put_prev_task)(struct rq *rq, struct task_struct *p);
+ void (*set_next_task)(struct rq *rq, struct task_struct *p);
+
+#ifdef CONFIG_SMP
+ int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+ int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
+ void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
+
+ void (*task_woken)(struct rq *this_rq, struct task_struct *task);
+
+ void (*set_cpus_allowed)(struct task_struct *p,
+ const struct cpumask *newmask);
+
+ void (*rq_online)(struct rq *rq);
+ void (*rq_offline)(struct rq *rq);
+#endif
+
+ void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
+ void (*task_fork)(struct task_struct *p);
+ void (*task_dead)(struct task_struct *p);
+
+ /*
+ * The switched_from() call is allowed to drop rq->lock, therefore we
+ * cannot assume the switched_from/switched_to pair is serliazed by
+ * rq->lock. They are however serialized by p->pi_lock.
+ */
+ void (*switched_from)(struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio);
+
+ unsigned int (*get_rr_interval)(struct rq *rq,
+ struct task_struct *task);
+
+ void (*update_curr)(struct rq *rq);
+
+#define TASK_SET_GROUP 0
+#define TASK_MOVE_GROUP 1
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ void (*task_change_group)(struct task_struct *p, int type);
+#endif
+};
+
+static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
+{
+ WARN_ON_ONCE(rq->curr != prev);
+ prev->sched_class->put_prev_task(rq, prev);
+}
+
+static inline void set_next_task(struct rq *rq, struct task_struct *next)
+{
+ WARN_ON_ONCE(rq->curr != next);
+ next->sched_class->set_next_task(rq, next);
+}
+
+#ifdef CONFIG_SMP
+#define sched_class_highest (&stop_sched_class)
+#else
+#define sched_class_highest (&dl_sched_class)
+#endif
+
+#define for_class_range(class, _from, _to) \
+ for (class = (_from); class != (_to); class = class->next)
+
+#define for_each_class(class) \
+ for_class_range(class, sched_class_highest, NULL)
+
+extern const struct sched_class stop_sched_class;
+extern const struct sched_class dl_sched_class;
+extern const struct sched_class rt_sched_class;
+extern const struct sched_class fair_sched_class;
+extern const struct sched_class idle_sched_class;
+
+static inline bool sched_stop_runnable(struct rq *rq)
+{
+ return rq->stop && task_on_rq_queued(rq->stop);
+}
+
+static inline bool sched_dl_runnable(struct rq *rq)
+{
+ return rq->dl.dl_nr_running > 0;
+}
+
+static inline bool sched_rt_runnable(struct rq *rq)
+{
+ return rq->rt.rt_queued > 0;
+}
+
+static inline bool sched_fair_runnable(struct rq *rq)
+{
+ return rq->cfs.nr_running > 0;
+}
+
+#ifdef CONFIG_SMP
+
+extern void update_group_capacity(struct sched_domain *sd, int cpu);
+
+extern void trigger_load_balance(struct rq *rq);
+
+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
+
+#endif
+
+#ifdef CONFIG_CPU_IDLE
+static inline void idle_set_state(struct rq *rq,
+ struct cpuidle_state *idle_state)
+{
+ rq->idle_state = idle_state;
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+ SCHED_WARN_ON(!rcu_read_lock_held());
+
+ return rq->idle_state;
+}
+#else
+static inline void idle_set_state(struct rq *rq,
+ struct cpuidle_state *idle_state)
+{
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+ return NULL;
+}
+#endif
+
+extern void schedule_idle(void);
+
+extern void sysrq_sched_debug_show(void);
+extern void sched_init_granularity(void);
+extern void update_max_interval(void);
+
+extern void init_sched_dl_class(void);
+extern void init_sched_rt_class(void);
+extern void init_sched_fair_class(void);
+
+extern void reweight_task(struct task_struct *p, int prio);
+
+extern void resched_curr(struct rq *rq);
+extern void resched_cpu(int cpu);
+
+extern struct rt_bandwidth def_rt_bandwidth;
+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+extern struct dl_bandwidth def_dl_bandwidth;
+extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
+extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
+
+#define BW_SHIFT 20
+#define BW_UNIT (1 << BW_SHIFT)
+#define RATIO_SHIFT 8
+unsigned long to_ratio(u64 period, u64 runtime);
+
+extern void init_entity_runnable_average(struct sched_entity *se);
+extern void post_init_entity_util_avg(struct task_struct *p);
+
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+extern int __init sched_tick_offload_init(void);
+
+/*
+ * Tick may be needed by tasks in the runqueue depending on their policy and
+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
+ * nohz mode if necessary.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+ int cpu;
+
+ if (!tick_nohz_full_enabled())
+ return;
+
+ cpu = cpu_of(rq);
+
+ if (!tick_nohz_full_cpu(cpu))
+ return;
+
+ if (sched_can_stop_tick(rq))
+ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
+ else
+ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#else
+static inline int sched_tick_offload_init(void) { return 0; }
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
+static inline void add_nr_running(struct rq *rq, unsigned count)
+{
+ unsigned prev_nr = rq->nr_running;
+
+ rq->nr_running = prev_nr + count;
+
+#ifdef CONFIG_SMP
+ if (prev_nr < 2 && rq->nr_running >= 2) {
+ if (!READ_ONCE(rq->rd->overload))
+ WRITE_ONCE(rq->rd->overload, 1);
+ }
+#endif
+
+ sched_update_tick_dependency(rq);
+}
+
+static inline void sub_nr_running(struct rq *rq, unsigned count)
+{
+ rq->nr_running -= count;
+ /* Check if we still need preemption */
+ sched_update_tick_dependency(rq);
+}
+
+extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
+extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+
+extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+
+extern const_debug unsigned int sysctl_sched_nr_migrate;
+extern const_debug unsigned int sysctl_sched_migration_cost;
+
+#ifdef CONFIG_SCHED_HRTICK
+
+/*
+ * Use hrtick when:
+ * - enabled by features
+ * - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+ if (!sched_feat(HRTICK))
+ return 0;
+ if (!cpu_active(cpu_of(rq)))
+ return 0;
+ return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
+void hrtick_start(struct rq *rq, u64 delay);
+
+#else
+
+static inline int hrtick_enabled(struct rq *rq)
+{
+ return 0;
+}
+
+#endif /* CONFIG_SCHED_HRTICK */
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(int cpu)
+{
+ return SCHED_CAPACITY_SCALE;
+}
+#endif
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPTION
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -1907,7 +2038,7 @@
return ret;
}
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
@@ -2140,7 +2271,7 @@
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
-DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
@@ -2177,6 +2308,48 @@
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
+#ifdef CONFIG_UCLAMP_TASK
+enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+
+static __always_inline
+unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
+ struct task_struct *p)
+{
+ unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
+ unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
+
+ if (p) {
+ min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
+ max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
+ }
+
+ /*
+ * Since CPU's {min,max}_util clamps are MAX aggregated considering
+ * RUNNABLE tasks with _different_ clamps, we can end up with an
+ * inversion. Fix it now when the clamps are applied.
+ */
+ if (unlikely(min_util >= max_util))
+ return min_util;
+
+ return clamp(util, min_util, max_util);
+}
+
+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+{
+ return uclamp_util_with(rq, util, NULL);
+}
+#else /* CONFIG_UCLAMP_TASK */
+static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
+ struct task_struct *p)
+{
+ return util;
+}
+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+{
+ return util;
+}
+#endif /* CONFIG_UCLAMP_TASK */
+
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
# define arch_scale_freq_invariant() true
@@ -2185,7 +2358,34 @@
# define arch_scale_freq_invariant() false
#endif
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_orig_of(int cpu)
+{
+ return cpu_rq(cpu)->cpu_capacity_orig;
+}
+#endif
+
+/**
+ * enum schedutil_type - CPU utilization type
+ * @FREQUENCY_UTIL: Utilization used to select frequency
+ * @ENERGY_UTIL: Utilization used during energy calculation
+ *
+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
+ * need to be aggregated differently depending on the usage made of them. This
+ * enum is used within schedutil_freq_util() to differentiate the types of
+ * utilization expected by the callers, and adjust the aggregation accordingly.
+ */
+enum schedutil_type {
+ FREQUENCY_UTIL,
+ ENERGY_UTIL,
+};
+
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+
+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+ unsigned long max, enum schedutil_type type,
+ struct task_struct *p);
+
static inline unsigned long cpu_bw_dl(struct rq *rq)
{
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
@@ -2212,7 +2412,14 @@
{
return READ_ONCE(rq->avg_rt.util_avg);
}
-#endif
+#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+ unsigned long max, enum schedutil_type type,
+ struct task_struct *p)
+{
+ return 0;
+}
+#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
@@ -2241,3 +2448,51 @@
return util;
}
#endif
+
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+
+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
+
+DECLARE_STATIC_KEY_FALSE(sched_energy_present);
+
+static inline bool sched_energy_enabled(void)
+{
+ return static_branch_unlikely(&sched_energy_present);
+}
+
+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
+
+#define perf_domain_span(pd) NULL
+static inline bool sched_energy_enabled(void) { return false; }
+
+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
+#ifdef CONFIG_MEMBARRIER
+/*
+ * The scheduler provides memory barriers required by membarrier between:
+ * - prior user-space memory accesses and store to rq->membarrier_state,
+ * - store to rq->membarrier_state and following user-space memory accesses.
+ * In the same way it provides those guarantees around store to rq->curr.
+ */
+static inline void membarrier_switch_mm(struct rq *rq,
+ struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+ int membarrier_state;
+
+ if (prev_mm == next_mm)
+ return;
+
+ membarrier_state = atomic_read(&next_mm->membarrier_state);
+ if (READ_ONCE(rq->membarrier_state) == membarrier_state)
+ return;
+
+ WRITE_ONCE(rq->membarrier_state, membarrier_state);
+}
+#else
+static inline void membarrier_switch_mm(struct rq *rq,
+ struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+}
+#endif