Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4abb5bd..7755035 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -21,12 +21,15 @@
#include <linux/seccomp.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
+#include <linux/refcount.h>
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
+#include <linux/sched/types.h>
#include <linux/signal_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
+#include <linux/posix-timers.h>
#include <linux/rseq.h>
/* task_struct member predeclarations (sorted alphabetically): */
@@ -34,6 +37,7 @@
struct backing_dev_info;
struct bio_list;
struct blk_plug;
+struct capture_control;
struct cfs_rq;
struct fs_struct;
struct futex_pi_state;
@@ -47,6 +51,8 @@
struct rcu_node;
struct reclaim_state;
struct robust_list_head;
+struct root_domain;
+struct rq;
struct sched_attr;
struct sched_param;
struct seq_file;
@@ -175,7 +181,7 @@
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
* However, with slightly different timing the wakeup TASK_RUNNING store can
- * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
+ * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
* a problem either because that will result in one extra go around the loop
* and our @cond test will save the day.
*
@@ -217,6 +223,7 @@
extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
+asmlinkage void preempt_schedule_irq(void);
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
@@ -240,27 +247,6 @@
#endif
};
-/**
- * struct task_cputime - collected CPU time counts
- * @utime: time spent in user mode, in nanoseconds
- * @stime: time spent in kernel mode, in nanoseconds
- * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
- *
- * This structure groups together three kinds of CPU time that are tracked for
- * threads and thread groups. Most things considering CPU time want to group
- * these counts together and treat all three of them in parallel.
- */
-struct task_cputime {
- u64 utime;
- u64 stime;
- unsigned long long sum_exec_runtime;
-};
-
-/* Alternate field names when used on cache expirations: */
-#define virt_exp utime
-#define prof_exp stime
-#define sched_exp sum_exec_runtime
-
enum vtime_state {
/* Task is sleeping or running in a CPU with VTIME inactive: */
VTIME_INACTIVE = 0,
@@ -279,6 +265,23 @@
u64 gtime;
};
+/*
+ * Utilization clamp constraints.
+ * @UCLAMP_MIN: Minimum utilization
+ * @UCLAMP_MAX: Maximum utilization
+ * @UCLAMP_CNT: Utilization clamp constraints count
+ */
+enum uclamp_id {
+ UCLAMP_MIN = 0,
+ UCLAMP_MAX,
+ UCLAMP_CNT
+};
+
+#ifdef CONFIG_SMP
+extern struct root_domain def_root_domain;
+extern struct mutex sched_domains_mutex;
+#endif
+
struct sched_info {
#ifdef CONFIG_SCHED_INFO
/* Cumulative counters: */
@@ -310,6 +313,10 @@
# define SCHED_FIXEDPOINT_SHIFT 10
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
+/* Increase resolution of cpu_capacity calculations */
+# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
+# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
+
struct load_weight {
unsigned long weight;
u32 inv_weight;
@@ -355,12 +362,6 @@
* For cfs_rq, it is the aggregated load_avg of all runnable and
* blocked sched_entities.
*
- * load_avg may also take frequency scaling into account:
- *
- * load_avg = runnable% * scale_load_down(load) * freq%
- *
- * where freq% is the CPU frequency normalized to the highest frequency.
- *
* [util_avg definition]
*
* util_avg = running% * SCHED_CAPACITY_SCALE
@@ -369,17 +370,14 @@
* a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
* and blocked sched_entities.
*
- * util_avg may also factor frequency scaling and CPU capacity scaling:
+ * load_avg and util_avg don't direcly factor frequency scaling and CPU
+ * capacity scaling. The scaling is done through the rq_clock_pelt that
+ * is used for computing those signals (see update_rq_clock_pelt())
*
- * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
- *
- * where freq% is the same as above, and capacity% is the CPU capacity
- * normalized to the greatest capacity (due to uarch differences, etc).
- *
- * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
- * themselves are in the range of [0, 1]. To do fixed point arithmetics,
- * we therefore scale them to as large a range as necessary. This is for
- * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ * N.B., the above ratios (runnable% and running%) themselves are in the
+ * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
+ * to as large a range as necessary. This is for example reflected by
+ * util_avg's SCHED_CAPACITY_SCALE.
*
* [Overflow issue]
*
@@ -514,7 +512,7 @@
/*
* Actual scheduling parameters. Initialized with the values above,
- * they are continously updated during task execution. Note that
+ * they are continuously updated during task execution. Note that
* the remaining runtime could be < 0 in case we are in overrun.
*/
s64 runtime; /* Remaining runtime for this instance */
@@ -567,14 +565,47 @@
struct hrtimer inactive_timer;
};
+#ifdef CONFIG_UCLAMP_TASK
+/* Number of utilization clamp buckets (shorter alias) */
+#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
+
+/*
+ * Utilization clamp for a scheduling entity
+ * @value: clamp value "assigned" to a se
+ * @bucket_id: bucket index corresponding to the "assigned" value
+ * @active: the se is currently refcounted in a rq's bucket
+ * @user_defined: the requested clamp value comes from user-space
+ *
+ * The bucket_id is the index of the clamp bucket matching the clamp value
+ * which is pre-computed and stored to avoid expensive integer divisions from
+ * the fast path.
+ *
+ * The active bit is set whenever a task has got an "effective" value assigned,
+ * which can be different from the clamp value "requested" from user-space.
+ * This allows to know a task is refcounted in the rq's bucket corresponding
+ * to the "effective" bucket_id.
+ *
+ * The user_defined bit is set whenever a task has got a task-specific clamp
+ * value requested from userspace, i.e. the system defaults apply to this task
+ * just as a restriction. This allows to relax default clamps when a less
+ * restrictive task-specific value has been requested, thus allowing to
+ * implement a "nice" semantic. For example, a task running with a 20%
+ * default boost can still drop its own boosting to 0%.
+ */
+struct uclamp_se {
+ unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
+ unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
+ unsigned int active : 1;
+ unsigned int user_defined : 1;
+};
+#endif /* CONFIG_UCLAMP_TASK */
+
union rcu_special {
struct {
u8 blocked;
u8 need_qs;
- u8 exp_need_qs;
-
- /* Otherwise the compiler can store garbage here: */
- u8 pad;
+ u8 exp_hint; /* Hint for performance. */
+ u8 deferred_qs;
} b; /* Bits. */
u32 s; /* Set of bits. */
};
@@ -608,7 +639,7 @@
randomized_struct_fields_start
void *stack;
- atomic_t usage;
+ refcount_t usage;
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace;
@@ -649,6 +680,13 @@
#endif
struct sched_dl_entity dl;
+#ifdef CONFIG_UCLAMP_TASK
+ /* Clamp values requested for a scheduling entity */
+ struct uclamp_se uclamp_req[UCLAMP_CNT];
+ /* Effective clamp values used for a scheduling entity */
+ struct uclamp_se uclamp[UCLAMP_CNT];
+#endif
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
@@ -660,7 +698,8 @@
unsigned int policy;
int nr_cpus_allowed;
- cpumask_t cpus_allowed;
+ const cpumask_t *cpus_ptr;
+ cpumask_t cpus_mask;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -710,6 +749,10 @@
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
unsigned sched_remote_wakeup:1;
+#ifdef CONFIG_PSI
+ unsigned sched_psi_wake_requeue:1;
+#endif
+
/* Force alignment to the next boundary: */
unsigned :0;
@@ -723,9 +766,6 @@
#endif
#ifdef CONFIG_MEMCG
unsigned in_user_fault:1;
-#ifdef CONFIG_MEMCG_KMEM
- unsigned memcg_kmem_skip_account:1;
-#endif
#endif
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
@@ -733,6 +773,8 @@
#ifdef CONFIG_CGROUPS
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
+ /* task is frozen/stopped (used by the cgroup freezer) */
+ unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
/* to be used once the psi infrastructure lands upstream. */
@@ -821,10 +863,8 @@
unsigned long min_flt;
unsigned long maj_flt;
-#ifdef CONFIG_POSIX_TIMERS
- struct task_cputime cputime_expires;
- struct list_head cpu_timers[3];
-#endif
+ /* Empty if CONFIG_POSIX_CPUTIMERS=n */
+ struct posix_cputimers posix_cputimers;
/* Process credentials: */
@@ -837,6 +877,11 @@
/* Effective (overridable) subjective task credentials (COW): */
const struct cred __rcu *cred;
+#ifdef CONFIG_KEYS
+ /* Cached requested key. */
+ struct key *cached_requested_key;
+#endif
+
/*
* executable name, excluding path.
*
@@ -879,8 +924,10 @@
struct callback_head *task_works;
- struct audit_context *audit_context;
+#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALL
+ struct audit_context *audit_context;
+#endif
kuid_t loginuid;
unsigned int sessionid;
#endif
@@ -912,6 +959,10 @@
struct mutex_waiter *blocked_on;
#endif
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ int non_block_count;
+#endif
+
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
@@ -958,11 +1009,18 @@
struct io_context *io_context;
+#ifdef CONFIG_COMPACTION
+ struct capture_control *capture_control;
+#endif
/* Ptrace state: */
unsigned long ptrace_message;
- siginfo_t *last_siginfo;
+ kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
+#ifdef CONFIG_PSI
+ /* Pressure stall state */
+ unsigned int psi_flags;
+#endif
#ifdef CONFIG_TASK_XACCT
/* Accumulated RSS usage: */
u64 acct_rss_mem1;
@@ -985,7 +1043,7 @@
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list;
#endif
-#ifdef CONFIG_INTEL_RDT
+#ifdef CONFIG_X86_CPU_RESCTRL
u32 closid;
u32 rmid;
#endif
@@ -996,6 +1054,8 @@
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
+ struct mutex futex_exit_mutex;
+ unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -1023,7 +1083,15 @@
u64 last_sum_exec_runtime;
struct callback_head numa_work;
- struct numa_group *numa_group;
+ /*
+ * This pointer is only modified for current in syscall and
+ * pagefault context (and for tasks being destroyed), so it can be read
+ * from any of the following contexts:
+ * - RCU read-side critical section
+ * - current->numa_group from everywhere
+ * - task's runqueue locked, task not running
+ */
+ struct numa_group __rcu *numa_group;
/*
* numa_faults is an array split into four regions:
@@ -1055,7 +1123,6 @@
#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
- u32 rseq_len;
u32 rseq_sig;
/*
* RmW on rseq_event_mask must be performed atomically
@@ -1066,7 +1133,10 @@
struct tlbflush_unmap_batch tlb_ubc;
- struct rcu_head rcu;
+ union {
+ refcount_t rcu_users;
+ struct rcu_head rcu;
+ };
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
@@ -1183,7 +1253,7 @@
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* A live task holds one reference: */
- atomic_t stack_refcount;
+ refcount_t stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
int patch_state;
@@ -1193,6 +1263,11 @@
void *security;
#endif
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ unsigned long lowest_stack;
+ unsigned long prev_lowest_stack;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -1369,7 +1444,6 @@
*/
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
-#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
@@ -1390,9 +1464,11 @@
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
+#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
-#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
+#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
@@ -1442,6 +1518,7 @@
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
+#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1470,6 +1547,10 @@
TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
@@ -1504,10 +1585,6 @@
}
#endif
-#ifndef cpu_relax_yield
-#define cpu_relax_yield() cpu_relax()
-#endif
-
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
@@ -1683,7 +1760,7 @@
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
*/
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
extern int _cond_resched(void);
#else
static inline int _cond_resched(void) { return 0; }
@@ -1712,12 +1789,12 @@
/*
* Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
* but a general need for low latency)
*/
static inline int spin_needbreak(spinlock_t *lock)
{
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
return spin_is_contended(lock);
#else
return 0;
@@ -1737,9 +1814,9 @@
static inline unsigned int task_cpu(const struct task_struct *p)
{
#ifdef CONFIG_THREAD_INFO_IN_TASK
- return p->cpu;
+ return READ_ONCE(p->cpu);
#else
- return task_thread_info(p)->cpu;
+ return READ_ONCE(task_thread_info(p)->cpu);
#endif
}
@@ -1767,7 +1844,10 @@
* running or not.
*/
#ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu) false
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return false;
+}
#endif
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
@@ -1841,12 +1921,10 @@
{
if (clone_flags & CLONE_THREAD) {
t->rseq = NULL;
- t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
} else {
t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
t->rseq_event_mask = current->rseq_event_mask;
}
@@ -1855,7 +1933,6 @@
static inline void rseq_execve(struct task_struct *t)
{
t->rseq = NULL;
- t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
}
@@ -1888,6 +1965,14 @@
#endif
+void __exit_umh(struct task_struct *tsk);
+
+static inline void exit_umh(struct task_struct *tsk)
+{
+ if (unlikely(tsk->flags & PF_UMH))
+ __exit_umh(tsk);
+}
+
#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
@@ -1900,4 +1985,16 @@
#endif
+const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
+char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
+int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
+
+const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
+const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
+const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
+
+int sched_trace_rq_cpu(struct rq *rq);
+
+const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
+
#endif