Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5710b80..f996d1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/plist.h>
#include <linux/hrtimer.h>
+#include <linux/irqflags.h>
#include <linux/seccomp.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
@@ -31,6 +32,8 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers.h>
#include <linux/rseq.h>
+#include <linux/seqlock.h>
+#include <linux/kcsan.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -60,6 +63,7 @@
struct signal_struct;
struct task_delay_info;
struct task_group;
+struct io_uring_task;
/*
* Task state bitmask. NOTE! These bits are also
@@ -113,10 +117,6 @@
#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
- (task->flags & PF_FROZEN) == 0 && \
- (task->state & TASK_NOLOAD) == 0)
-
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
/*
@@ -157,24 +157,24 @@
*
* for (;;) {
* set_current_state(TASK_UNINTERRUPTIBLE);
- * if (!need_sleep)
- * break;
+ * if (CONDITION)
+ * break;
*
* schedule();
* }
* __set_current_state(TASK_RUNNING);
*
* If the caller does not need such serialisation (because, for instance, the
- * condition test and condition change and wakeup are under the same lock) then
+ * CONDITION test and condition change and wakeup are under the same lock) then
* use __set_current_state().
*
* The above is typically ordered against the wakeup, which does:
*
- * need_sleep = false;
+ * CONDITION = 1;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
- * where wake_up_state() executes a full memory barrier before accessing the
- * task state.
+ * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
+ * accessing p->state.
*
* Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
@@ -250,16 +250,21 @@
enum vtime_state {
/* Task is sleeping or running in a CPU with VTIME inactive: */
VTIME_INACTIVE = 0,
- /* Task runs in userspace in a CPU with VTIME active: */
- VTIME_USER,
+ /* Task is idle */
+ VTIME_IDLE,
/* Task runs in kernelspace in a CPU with VTIME active: */
VTIME_SYS,
+ /* Task runs in userspace in a CPU with VTIME active: */
+ VTIME_USER,
+ /* Task runs as guests in a CPU with VTIME active: */
+ VTIME_GUEST,
};
struct vtime {
seqcount_t seqcount;
unsigned long long starttime;
enum vtime_state state;
+ unsigned int cpu;
u64 utime;
u64 stime;
u64 gtime;
@@ -343,36 +348,46 @@
* Only for tasks we track a moving average of the past instantaneous
* estimated utilization. This allows to absorb sporadic drops in utilization
* of an otherwise almost periodic task.
+ *
+ * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
+ * updates. When a task is dequeued, its util_est should not be updated if its
+ * util_avg has not been updated in the meantime.
+ * This information is mapped into the MSB bit of util_est.enqueued at dequeue
+ * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
+ * for a task) it is safe to use MSB.
*/
struct util_est {
unsigned int enqueued;
unsigned int ewma;
#define UTIL_EST_WEIGHT_SHIFT 2
+#define UTIL_AVG_UNCHANGED 0x80000000
} __attribute__((__aligned__(sizeof(u64))));
/*
- * The load_avg/util_avg accumulates an infinite geometric series
- * (see __update_load_avg() in kernel/sched/fair.c).
+ * The load/runnable/util_avg accumulates an infinite geometric series
+ * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
*
* [load_avg definition]
*
* load_avg = runnable% * scale_load_down(load)
*
- * where runnable% is the time ratio that a sched_entity is runnable.
- * For cfs_rq, it is the aggregated load_avg of all runnable and
- * blocked sched_entities.
+ * [runnable_avg definition]
+ *
+ * runnable_avg = runnable% * SCHED_CAPACITY_SCALE
*
* [util_avg definition]
*
* util_avg = running% * SCHED_CAPACITY_SCALE
*
- * where running% is the time ratio that a sched_entity is running on
- * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
- * and blocked sched_entities.
+ * where runnable% is the time ratio that a sched_entity is runnable and
+ * running% the time ratio that a sched_entity is running.
*
- * load_avg and util_avg don't direcly factor frequency scaling and CPU
- * capacity scaling. The scaling is done through the rq_clock_pelt that
- * is used for computing those signals (see update_rq_clock_pelt())
+ * For cfs_rq, they are the aggregated values of all runnable and blocked
+ * sched_entities.
+ *
+ * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
+ * capacity scaling. The scaling is done through the rq_clock_pelt that is used
+ * for computing those signals (see update_rq_clock_pelt())
*
* N.B., the above ratios (runnable% and running%) themselves are in the
* range of [0, 1]. To do fixed point arithmetics, we therefore scale them
@@ -396,11 +411,11 @@
struct sched_avg {
u64 last_update_time;
u64 load_sum;
- u64 runnable_load_sum;
+ u64 runnable_sum;
u32 util_sum;
u32 period_contrib;
unsigned long load_avg;
- unsigned long runnable_load_avg;
+ unsigned long runnable_avg;
unsigned long util_avg;
struct util_est util_est;
} ____cacheline_aligned;
@@ -444,7 +459,6 @@
struct sched_entity {
/* For load-balancing: */
struct load_weight load;
- unsigned long runnable_weight;
struct rb_node run_node;
struct list_head group_node;
unsigned int on_rq;
@@ -465,6 +479,8 @@
struct cfs_rq *cfs_rq;
/* rq "owned" by this entity/group: */
struct cfs_rq *my_q;
+ /* cached value of my_q->h_nr_running */
+ unsigned long runnable_weight;
#endif
#ifdef CONFIG_SMP
@@ -544,7 +560,6 @@
* overruns.
*/
unsigned int dl_throttled : 1;
- unsigned int dl_boosted : 1;
unsigned int dl_yielded : 1;
unsigned int dl_non_contending : 1;
unsigned int dl_overrun : 1;
@@ -563,6 +578,15 @@
* time.
*/
struct hrtimer inactive_timer;
+
+#ifdef CONFIG_RT_MUTEXES
+ /*
+ * Priority Inheritance. When a DEADLINE scheduling entity is boosted
+ * pi_se points to the donor, otherwise points to the dl_se it belongs
+ * to (the original one/itself).
+ */
+ struct sched_dl_entity *pi_se;
+#endif
};
#ifdef CONFIG_UCLAMP_TASK
@@ -605,7 +629,7 @@
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
- u8 deferred_qs;
+ u8 need_mb; /* Readers need smp_mb(). */
} b; /* Bits. */
u32 s; /* Set of bits. */
};
@@ -645,8 +669,8 @@
unsigned int ptrace;
#ifdef CONFIG_SMP
- struct llist_node wake_entry;
int on_cpu;
+ struct __call_single_node wake_entry;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
unsigned int cpu;
@@ -681,9 +705,15 @@
struct sched_dl_entity dl;
#ifdef CONFIG_UCLAMP_TASK
- /* Clamp values requested for a scheduling entity */
+ /*
+ * Clamp values requested for a scheduling entity.
+ * Must be updated with task_rq_lock() held.
+ */
struct uclamp_se uclamp_req[UCLAMP_CNT];
- /* Effective clamp values used for a scheduling entity */
+ /*
+ * Effective clamp values used for a scheduling entity.
+ * Must be updated with task_rq_lock() held.
+ */
struct uclamp_se uclamp[UCLAMP_CNT];
#endif
@@ -716,6 +746,14 @@
struct list_head rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
+#ifdef CONFIG_TASKS_TRACE_RCU
+ int trc_reader_nesting;
+ int trc_ipi_to_cpu;
+ union rcu_special trc_reader_special;
+ bool trc_reader_checked;
+ struct list_head trc_holdout_list;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+
struct sched_info sched_info;
struct list_head tasks;
@@ -748,7 +786,6 @@
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
- unsigned sched_remote_wakeup:1;
#ifdef CONFIG_PSI
unsigned sched_psi_wake_requeue:1;
#endif
@@ -758,6 +795,21 @@
/* Unserialized, strictly 'current' */
+ /*
+ * This field must not be in the scheduler word above due to wakelist
+ * queueing no longer being serialized by p->on_cpu. However:
+ *
+ * p->XXX = X; ttwu()
+ * schedule() if (p->on_rq && ..) // false
+ * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
+ * deactivate_task() ttwu_queue_wakelist())
+ * p->on_rq = 0; p->sched_remote_wakeup = Y;
+ *
+ * guarantees all stores of 'current' are visible before
+ * ->sched_remote_wakeup gets used, so it can be in this word.
+ */
+ unsigned sched_remote_wakeup:1;
+
/* Bit to tell LSMs we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
@@ -777,9 +829,12 @@
unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
- /* to be used once the psi infrastructure lands upstream. */
unsigned use_memdelay:1;
#endif
+#ifdef CONFIG_PSI
+ /* Stalled due to lack of memory */
+ unsigned in_memstall:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -857,7 +912,7 @@
u64 start_time;
/* Boot based time in nsecs: */
- u64 real_start_time;
+ u64 start_boottime;
/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
unsigned long min_flt;
@@ -866,6 +921,10 @@
/* Empty if CONFIG_POSIX_CPUTIMERS=n */
struct posix_cputimers posix_cputimers;
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+ struct posix_cputimers_work posix_cputimers_work;
+#endif
+
/* Process credentials: */
/* Tracer's credentials at attach: */
@@ -907,12 +966,16 @@
/* Open file information: */
struct files_struct *files;
+#ifdef CONFIG_IO_URING
+ struct io_uring_task *io_uring;
+#endif
+
/* Namespaces: */
struct nsproxy *nsproxy;
/* Signal handlers: */
struct signal_struct *signal;
- struct sighand_struct *sighand;
+ struct sighand_struct __rcu *sighand;
sigset_t blocked;
sigset_t real_blocked;
/* Restored if set_restore_sigmask() was used: */
@@ -964,19 +1027,12 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- unsigned int irq_events;
- unsigned long hardirq_enable_ip;
- unsigned long hardirq_disable_ip;
- unsigned int hardirq_enable_event;
- unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
- unsigned long softirq_disable_ip;
- unsigned long softirq_enable_ip;
- unsigned int softirq_disable_event;
- unsigned int softirq_enable_event;
+ struct irqtrace_events irqtrace;
+ unsigned int hardirq_threaded;
+ u64 hardirq_chain_key;
int softirqs_enabled;
int softirq_context;
+ int irq_config;
#endif
#ifdef CONFIG_LOCKDEP
@@ -987,7 +1043,7 @@
struct held_lock held_locks[MAX_LOCK_DEPTH];
#endif
-#ifdef CONFIG_UBSAN
+#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
unsigned int in_ubsan;
#endif
@@ -1033,7 +1089,7 @@
/* Protected by ->alloc_lock: */
nodemask_t mems_allowed;
/* Seqence number to catch updates: */
- seqcount_t mems_allowed_seq;
+ seqcount_spinlock_t mems_allowed_seq;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
@@ -1175,6 +1231,17 @@
unsigned int kasan_depth;
#endif
+#ifdef CONFIG_KCSAN
+ struct kcsan_ctx kcsan_ctx;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ struct irqtrace_events kcsan_save_irqtrace;
+#endif
+#endif
+
+#if IS_ENABLED(CONFIG_KUNIT)
+ struct kunit *kunit_test;
+#endif
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
int curr_ret_stack;
@@ -1205,6 +1272,8 @@
#endif /* CONFIG_TRACING */
#ifdef CONFIG_KCOV
+ /* See kernel/kcov.c for more details. */
+
/* Coverage collection mode enabled for this task (0 if disabled): */
unsigned int kcov_mode;
@@ -1216,6 +1285,15 @@
/* KCOV descriptor wired with this task or NULL: */
struct kcov *kcov;
+
+ /* KCOV common handle for remote coverage collection: */
+ u64 kcov_handle;
+
+ /* KCOV sequence number: */
+ int kcov_sequence;
+
+ /* Collect coverage from softirq context: */
+ unsigned int kcov_softirq;
#endif
#ifdef CONFIG_MEMCG
@@ -1268,6 +1346,17 @@
unsigned long prev_lowest_stack;
#endif
+#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
+ u64 mce_addr;
+ __u64 mce_ripv : 1,
+ mce_whole_page : 1,
+ __mce_reserved : 62;
+ struct callback_head mce_kill_me;
+ int mce_count;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -1442,9 +1531,10 @@
/*
* Per process flags
*/
+#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
-#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
+#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */
@@ -1454,18 +1544,16 @@
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
-#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
-#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
+#define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
+ * I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
-#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
-#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
@@ -1500,7 +1588,7 @@
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
-static inline bool is_percpu_thread(void)
+static __always_inline bool is_percpu_thread(void)
{
#ifdef CONFIG_SMP
return (current->flags & PF_NO_SETAFFINITY) &&
@@ -1606,6 +1694,9 @@
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
+extern void sched_set_fifo(struct task_struct *p);
+extern void sched_set_fifo_low(struct task_struct *p);
+extern void sched_set_normal(struct task_struct *p, int nice);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
extern struct task_struct *idle_task(int cpu);
@@ -1616,7 +1707,7 @@
*
* Return: 1 if @p is an idle task. 0 otherwise.
*/
-static inline bool is_idle_task(const struct task_struct *p)
+static __always_inline bool is_idle_task(const struct task_struct *p)
{
return !!(p->flags & PF_IDLE);
}
@@ -1694,7 +1785,15 @@
})
#ifdef CONFIG_SMP
-void scheduler_ipi(void);
+static __always_inline void scheduler_ipi(void)
+{
+ /*
+ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+ * TIF_NEED_RESCHED remotely (for the first time) will also send
+ * this IPI.
+ */
+ preempt_fold_need_resched();
+}
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
#else
static inline void scheduler_ipi(void) { }
@@ -1965,14 +2064,6 @@
#endif
-void __exit_umh(struct task_struct *tsk);
-
-static inline void exit_umh(struct task_struct *tsk)
-{
- if (unlikely(tsk->flags & PF_UMH))
- __exit_umh(tsk);
-}
-
#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
@@ -1994,6 +2085,8 @@
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
int sched_trace_rq_cpu(struct rq *rq);
+int sched_trace_rq_cpu_capacity(struct rq *rq);
+int sched_trace_rq_nr_running(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);