Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index da3cc3a..ad3f82a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -40,7 +40,6 @@
#include <linux/ftrace.h>
#include <linux/syscalls.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/fpu/internal.h>
#include <asm/mmu_context.h>
@@ -48,12 +47,11 @@
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
-#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#ifdef CONFIG_IA32_EMULATION
@@ -64,30 +62,31 @@
#include "process.h"
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
+ const char *log_lvl)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
unsigned int fsindex, gsindex;
unsigned int ds, es;
- show_iret_regs(regs);
+ show_iret_regs(regs, log_lvl);
if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else
pr_cont("\n");
- printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
- regs->ax, regs->bx, regs->cx);
- printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
- regs->dx, regs->si, regs->di);
- printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
- regs->bp, regs->r8, regs->r9);
- printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
+ printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n",
+ log_lvl, regs->ax, regs->bx, regs->cx);
+ printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n",
+ log_lvl, regs->dx, regs->si, regs->di);
+ printk("%sRBP: %016lx R08: %016lx R09: %016lx\n",
+ log_lvl, regs->bp, regs->r8, regs->r9);
+ printk("%sR10: %016lx R11: %016lx R12: %016lx\n",
+ log_lvl, regs->r10, regs->r11, regs->r12);
+ printk("%sR13: %016lx R14: %016lx R15: %016lx\n",
+ log_lvl, regs->r13, regs->r14, regs->r15);
if (mode == SHOW_REGS_SHORT)
return;
@@ -95,8 +94,8 @@
if (mode == SHOW_REGS_USER) {
rdmsrl(MSR_FS_BASE, fs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
- printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
- fs, shadowgs);
+ printk("%sFS: %016lx GS: %016lx\n",
+ log_lvl, fs, shadowgs);
return;
}
@@ -114,12 +113,12 @@
cr3 = __read_cr3();
cr4 = __read_cr4();
- printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs, fsindex, gs, gsindex, shadowgs);
- printk(KERN_DEFAULT "CS: %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds,
- es, cr0);
- printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
- cr4);
+ printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ log_lvl, fs, fsindex, gs, gsindex, shadowgs);
+ printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ log_lvl, regs->cs, ds, es, cr0);
+ printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
+ log_lvl, cr2, cr3, cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
@@ -131,14 +130,14 @@
/* Only print out debug registers if they are in their non-default state. */
if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
(d6 == DR6_RESERVED) && (d7 == 0x400))) {
- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
- d0, d1, d2);
- printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
- d3, d6, d7);
+ printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
+ log_lvl, d0, d1, d2);
+ printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
+ log_lvl, d3, d6, d7);
}
if (boot_cpu_has(X86_FEATURE_OSPKE))
- printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
+ printk("%sPKRU: %08x\n", log_lvl, read_pkru());
}
void release_thread(struct task_struct *dead_task)
@@ -152,6 +151,56 @@
};
/*
+ * Out of line to be protected from kprobes and tracing. If this would be
+ * traced or probed than any access to a per CPU variable happens with
+ * the wrong GS.
+ *
+ * It is not used on Xen paravirt. When paravirt support is needed, it
+ * needs to be renamed with native_ prefix.
+ */
+static noinstr unsigned long __rdgsbase_inactive(void)
+{
+ unsigned long gsbase;
+
+ lockdep_assert_irqs_disabled();
+
+ if (!static_cpu_has(X86_FEATURE_XENPV)) {
+ native_swapgs();
+ gsbase = rdgsbase();
+ native_swapgs();
+ } else {
+ instrumentation_begin();
+ rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ instrumentation_end();
+ }
+
+ return gsbase;
+}
+
+/*
+ * Out of line to be protected from kprobes and tracing. If this would be
+ * traced or probed than any access to a per CPU variable happens with
+ * the wrong GS.
+ *
+ * It is not used on Xen paravirt. When paravirt support is needed, it
+ * needs to be renamed with native_ prefix.
+ */
+static noinstr void __wrgsbase_inactive(unsigned long gsbase)
+{
+ lockdep_assert_irqs_disabled();
+
+ if (!static_cpu_has(X86_FEATURE_XENPV)) {
+ native_swapgs();
+ wrgsbase(gsbase);
+ native_swapgs();
+ } else {
+ instrumentation_begin();
+ wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ instrumentation_end();
+ }
+}
+
+/*
* Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
* not available. The goal is to be reasonably fast on non-FSGSBASE systems.
* It's forcibly inlined because it'll generate better code and this function
@@ -200,22 +249,35 @@
{
savesegment(fs, task->thread.fsindex);
savesegment(gs, task->thread.gsindex);
- save_base_legacy(task, task->thread.fsindex, FS);
- save_base_legacy(task, task->thread.gsindex, GS);
+ if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+ /*
+ * If FSGSBASE is enabled, we can't make any useful guesses
+ * about the base, and user code expects us to save the current
+ * value. Fortunately, reading the base directly is efficient.
+ */
+ task->thread.fsbase = rdfsbase();
+ task->thread.gsbase = __rdgsbase_inactive();
+ } else {
+ save_base_legacy(task, task->thread.fsindex, FS);
+ save_base_legacy(task, task->thread.gsindex, GS);
+ }
}
-#if IS_ENABLED(CONFIG_KVM)
/*
* While a process is running,current->thread.fsbase and current->thread.gsbase
- * may not match the corresponding CPU registers (see save_base_legacy()). KVM
- * wants an efficient way to save and restore FSBASE and GSBASE.
- * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
+ * may not match the corresponding CPU registers (see save_base_legacy()).
*/
-void save_fsgs_for_kvm(void)
+void current_save_fsgs(void)
{
+ unsigned long flags;
+
+ /* Interrupts need to be off for FSGSBASE */
+ local_irq_save(flags);
save_fsgs(current);
+ local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
+#if IS_ENABLED(CONFIG_KVM)
+EXPORT_SYMBOL_GPL(current_save_fsgs);
#endif
static __always_inline void loadseg(enum which_selector which,
@@ -280,14 +342,26 @@
static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
struct thread_struct *next)
{
- load_seg_legacy(prev->fsindex, prev->fsbase,
- next->fsindex, next->fsbase, FS);
- load_seg_legacy(prev->gsindex, prev->gsbase,
- next->gsindex, next->gsbase, GS);
+ if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+ /* Update the FS and GS selectors if they could have changed. */
+ if (unlikely(prev->fsindex || next->fsindex))
+ loadseg(FS, next->fsindex);
+ if (unlikely(prev->gsindex || next->gsindex))
+ loadseg(GS, next->gsindex);
+
+ /* Update the bases. */
+ wrfsbase(next->fsbase);
+ __wrgsbase_inactive(next->gsbase);
+ } else {
+ load_seg_legacy(prev->fsindex, prev->fsbase,
+ next->fsindex, next->fsbase, FS);
+ load_seg_legacy(prev->gsindex, prev->gsbase,
+ next->gsindex, next->gsbase, GS);
+ }
}
-static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
- unsigned short selector)
+unsigned long x86_fsgsbase_read_task(struct task_struct *task,
+ unsigned short selector)
{
unsigned short idx = selector >> 3;
unsigned long base;
@@ -329,13 +403,44 @@
return base;
}
+unsigned long x86_gsbase_read_cpu_inactive(void)
+{
+ unsigned long gsbase;
+
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ gsbase = __rdgsbase_inactive();
+ local_irq_restore(flags);
+ } else {
+ rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ }
+
+ return gsbase;
+}
+
+void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __wrgsbase_inactive(gsbase);
+ local_irq_restore(flags);
+ } else {
+ wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ }
+}
+
unsigned long x86_fsbase_read_task(struct task_struct *task)
{
unsigned long fsbase;
if (task == current)
fsbase = x86_fsbase_read_cpu();
- else if (task->thread.fsindex == 0)
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
+ (task->thread.fsindex == 0))
fsbase = task->thread.fsbase;
else
fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
@@ -349,7 +454,8 @@
if (task == current)
gsbase = x86_gsbase_read_cpu_inactive();
- else if (task->thread.gsindex == 0)
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
+ (task->thread.gsindex == 0))
gsbase = task->thread.gsbase;
else
gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
@@ -371,81 +477,6 @@
task->thread.gsbase = gsbase;
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
-{
- int err;
- struct pt_regs *childregs;
- struct fork_frame *fork_frame;
- struct inactive_task_frame *frame;
- struct task_struct *me = current;
-
- childregs = task_pt_regs(p);
- fork_frame = container_of(childregs, struct fork_frame, regs);
- frame = &fork_frame->frame;
-
- frame->bp = 0;
- frame->ret_addr = (unsigned long) ret_from_fork;
- p->thread.sp = (unsigned long) fork_frame;
- p->thread.io_bitmap_ptr = NULL;
-
- savesegment(gs, p->thread.gsindex);
- p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
- savesegment(fs, p->thread.fsindex);
- p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
- savesegment(es, p->thread.es);
- savesegment(ds, p->thread.ds);
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
- if (unlikely(p->flags & PF_KTHREAD)) {
- /* kernel thread */
- memset(childregs, 0, sizeof(struct pt_regs));
- frame->bx = sp; /* function */
- frame->r12 = arg;
- return 0;
- }
- frame->bx = 0;
- *childregs = *current_pt_regs();
-
- childregs->ax = 0;
- if (sp)
- childregs->sp = sp;
-
- err = -ENOMEM;
- if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_IA32_EMULATION
- if (in_ia32_syscall())
- err = do_set_thread_area(p, -1,
- (struct user_desc __user *)tls, 0);
- else
-#endif
- err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
- if (err)
- goto out;
- }
- err = 0;
-out:
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
-
- return err;
-}
-
static void
start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
@@ -469,7 +500,6 @@
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
- force_iret();
}
void
@@ -505,15 +535,13 @@
{
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
- struct fpu *prev_fpu = &prev->fpu;
- struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_fpu, cpu);
+ switch_fpu_prepare(prev_p, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -565,24 +593,13 @@
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
- switch_fpu_finish(next_fpu);
+ switch_fpu_finish(next_p);
/* Reload sp0. */
update_task_stack(next_p);
switch_to_extra(prev_p, next_p);
-#ifdef CONFIG_XEN_PV
- /*
- * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
- * current_pt_regs()->flags may not match the current task's
- * intended IOPL. We need to switch it manually.
- */
- if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
- prev->iopl != next->iopl))
- xen_set_iopl_mask(next->iopl);
-#endif
-
if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
/*
* AMD CPUs have a misfeature: SYSRET sets the SS selector but