Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8c9b202..571220a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -82,6 +82,45 @@
}
EXPORT_SYMBOL(irq_fpu_usable);
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+int copy_fpregs_to_fpstate(struct fpu *fpu)
+{
+ if (likely(use_xsave())) {
+ copy_xregs_to_kernel(&fpu->state.xsave);
+
+ /*
+ * AVX512 state is tracked here because its use is
+ * known to slow the max clock speed of the core.
+ */
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+ fpu->avx512_timestamp = jiffies;
+ return 1;
+ }
+
+ if (likely(use_fxsr())) {
+ copy_fxregs_to_kernel(fpu);
+ return 1;
+ }
+
+ /*
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
+ * so we have to mark them inactive:
+ */
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
+
+ return 0;
+}
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
+
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
preempt_disable();
@@ -298,15 +337,13 @@
}
/*
- * Clear FPU registers by setting them up from
- * the init fpstate:
+ * Clear FPU registers by setting them up from the init fpstate.
+ * Caller must do fpregs_[un]lock() around it.
*/
-static inline void copy_init_fpstate_to_fpregs(void)
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
{
- fpregs_lock();
-
if (use_xsave())
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
else if (static_cpu_has(X86_FEATURE_FXSR))
copy_kernel_to_fxregs(&init_fpstate.fxsave);
else
@@ -314,9 +351,6 @@
if (boot_cpu_has(X86_FEATURE_OSPKE))
copy_init_pkru_to_fpregs();
-
- fpregs_mark_activate();
- fpregs_unlock();
}
/*
@@ -325,18 +359,40 @@
* Called by sys_execve(), by the signal handler code and by various
* error paths.
*/
-void fpu__clear(struct fpu *fpu)
+static void fpu__clear(struct fpu *fpu, bool user_only)
{
- WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */
+ WARN_ON_FPU(fpu != ¤t->thread.fpu);
- fpu__drop(fpu);
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
+ fpu__drop(fpu);
+ fpu__initialize(fpu);
+ return;
+ }
- /*
- * Make sure fpstate is cleared and initialized.
- */
- fpu__initialize(fpu);
- if (static_cpu_has(X86_FEATURE_FPU))
- copy_init_fpstate_to_fpregs();
+ fpregs_lock();
+
+ if (user_only) {
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
+ xfeatures_mask_supervisor())
+ copy_kernel_to_xregs(&fpu->state.xsave,
+ xfeatures_mask_supervisor());
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
+ } else {
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
+ }
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+}
+
+void fpu__clear_user_states(struct fpu *fpu)
+{
+ fpu__clear(fpu, true);
+}
+
+void fpu__clear_all(struct fpu *fpu)
+{
+ fpu__clear(fpu, false);
}
/*
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index b271da0..701f196 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
#include <asm/fpu/internal.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
-#include <asm/cmdline.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
@@ -224,7 +223,8 @@
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
- return XCNTXT_MASK;
+ return XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
/* Legacy code to initialize eager fpu mode. */
@@ -237,65 +237,11 @@
}
/*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
- char arg[128];
- char *argptr = arg;
- int arglen, res, bit;
-
-#ifdef CONFIG_X86_32
- if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
- setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
- pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
-#endif
-
- if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
- if (cmdline_find_option_bool(boot_command_line, "noxsave"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
- if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-
- if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-
- arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
- if (arglen <= 0)
- return;
-
- pr_info("Clearing CPUID bits:");
- do {
- res = get_option(&argptr, &bit);
- if (res == 0 || res == 3)
- break;
-
- /* If the argument was too long, the last bit may be cut off */
- if (res == 1 && arglen >= sizeof(arg))
- break;
-
- if (bit >= 0 && bit < NCAPINTS * 32) {
- pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
- setup_clear_cpu_cap(bit);
- }
- } while (res == 2);
- pr_cont("\n");
-}
-
-/*
* Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes:
*/
void __init fpu__init_system(struct cpuinfo_x86 *c)
{
- fpu__init_parse_early_param();
fpu__init_system_early_generic(c);
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 68e1fb6..6bb8744 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -27,8 +27,7 @@
}
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct fpu *fpu = &target->thread.fpu;
@@ -38,8 +37,7 @@
fpu__prepare_read(fpu);
fpstate_sanitize_xstate(fpu);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpu->state.fxsave, 0, -1);
+ return membuf_write(&to, &fpu->state.fxsave, sizeof(struct fxregs_state));
}
int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -74,12 +72,10 @@
}
int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct fpu *fpu = &target->thread.fpu;
struct xregs_state *xsave;
- int ret;
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
@@ -89,10 +85,8 @@
fpu__prepare_read(fpu);
if (using_compacted_format()) {
- if (kbuf)
- ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
- else
- ret = copy_xstate_to_user(ubuf, xsave, pos, count);
+ copy_xstate_to_kernel(to, xsave);
+ return 0;
} else {
fpstate_sanitize_xstate(fpu);
/*
@@ -105,9 +99,8 @@
/*
* Copy the xstate memory layout.
*/
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+ return membuf_write(&to, xsave, fpu_user_xstate_size);
}
- return ret;
}
int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -139,7 +132,7 @@
} else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
if (!ret)
- ret = validate_xstate_header(&xsave->header);
+ ret = validate_user_xstate_header(&xsave->header);
}
/*
@@ -293,8 +286,7 @@
}
int fpregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct fpu *fpu = &target->thread.fpu;
struct user_i387_ia32_struct env;
@@ -302,23 +294,22 @@
fpu__prepare_read(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+ return fpregs_soft_get(target, regset, to);
- if (!boot_cpu_has(X86_FEATURE_FXSR))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpu->state.fsave, 0,
- -1);
+ if (!boot_cpu_has(X86_FEATURE_FXSR)) {
+ return membuf_write(&to, &fpu->state.fsave,
+ sizeof(struct fregs_state));
+ }
fpstate_sanitize_xstate(fpu);
- if (kbuf && pos == 0 && count == sizeof(env)) {
- convert_from_fxsr(kbuf, target);
+ if (to.left == sizeof(env)) {
+ convert_from_fxsr(to.p, target);
return 0;
}
convert_from_fxsr(&env, target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+ return membuf_write(&to, &env, sizeof(env));
}
int fpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -356,20 +347,4 @@
return ret;
}
-/*
- * FPU state for core dumps.
- * This is only used for a.out dumps now.
- * It is declared generically using elf_fpregset_t (which is
- * struct user_i387_struct) but is in fact only used for 32-bit
- * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
- */
-int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
-{
- struct task_struct *tsk = current;
-
- return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct),
- ufpu, NULL);
-}
-EXPORT_SYMBOL(dump_fpu);
-
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ab2f9c2..b7b92cd 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -170,14 +170,15 @@
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
+ struct user_i387_ia32_struct fp;
+ fpregs_soft_get(current, NULL, (struct membuf){.p = &fp,
+ .left = sizeof(fp)});
+ return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0;
+ }
+
if (!access_ok(buf, size))
return -EACCES;
-
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_get(current, NULL, 0,
- sizeof(struct user_i387_ia32_struct), NULL,
- (struct _fpstate_32 __user *) buf) ? -1 : 1;
-
retry:
/*
* Load the FPU registers if they are not valid for the current task.
@@ -211,28 +212,27 @@
}
static inline void
-sanitize_restored_xstate(union fpregs_state *state,
- struct user_i387_ia32_struct *ia32_env,
- u64 xfeatures, int fx_only)
+sanitize_restored_user_xstate(union fpregs_state *state,
+ struct user_i387_ia32_struct *ia32_env,
+ u64 user_xfeatures, int fx_only)
{
struct xregs_state *xsave = &state->xsave;
struct xstate_header *header = &xsave->header;
if (use_xsave()) {
/*
- * Note: we don't need to zero the reserved bits in the
- * xstate_header here because we either didn't copy them at all,
- * or we checked earlier that they aren't set.
+ * Clear all feature bits which are not set in
+ * user_xfeatures and clear all extended features
+ * for fx_only mode.
*/
+ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
/*
- * Init the state that is not present in the memory
- * layout and not enabled by the OS.
+ * Supervisor state has to be preserved. The sigframe
+ * restore can only modify user features, i.e. @mask
+ * cannot contain them.
*/
- if (fx_only)
- header->xfeatures = XFEATURE_MASK_FPSSE;
- else
- header->xfeatures &= xfeatures;
+ header->xfeatures &= mask | xfeatures_mask_supervisor();
}
if (use_fxsr()) {
@@ -252,16 +252,24 @@
*/
static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
{
+ u64 init_bv;
+ int r;
+
if (use_xsave()) {
if (fx_only) {
- u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
- copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- return copy_user_to_fxregs(buf);
- } else {
- u64 init_bv = xfeatures_mask & ~xbv;
- if (unlikely(init_bv))
+ init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
+
+ r = copy_user_to_fxregs(buf);
+ if (!r)
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- return copy_user_to_xregs(buf, xbv);
+ return r;
+ } else {
+ init_bv = xfeatures_mask_user() & ~xbv;
+
+ r = copy_user_to_xregs(buf, xbv);
+ if (!r && unlikely(init_bv))
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ return r;
}
} else if (use_fxsr()) {
return copy_user_to_fxregs(buf);
@@ -277,7 +285,7 @@
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
- u64 xfeatures = 0;
+ u64 user_xfeatures = 0;
int fx_only = 0;
int ret = 0;
@@ -285,7 +293,7 @@
IS_ENABLED(CONFIG_IA32_EMULATION));
if (!buf) {
- fpu__clear(fpu);
+ fpu__clear_user_states(fpu);
return 0;
}
@@ -314,32 +322,14 @@
trace_x86_fpu_xstate_check_failed(fpu);
} else {
state_size = fx_sw_user.xstate_size;
- xfeatures = fx_sw_user.xfeatures;
+ user_xfeatures = fx_sw_user.xfeatures;
}
}
- /*
- * The current state of the FPU registers does not matter. By setting
- * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
- * is not modified on context switch and that the xstate is considered
- * to be loaded again on return to userland (overriding last_cpu avoids
- * the optimisation).
- */
- set_thread_flag(TIF_NEED_FPU_LOAD);
- __fpu_invalidate_fpregs_state(fpu);
-
if ((unsigned long)buf_fx % 64)
fx_only = 1;
- /*
- * For 32-bit frames with fxstate, copy the fxstate so it can be
- * reconstructed later.
- */
- if (ia32_fxstate) {
- ret = __copy_from_user(&env, buf, sizeof(env));
- if (ret)
- goto out;
- envp = &env;
- } else {
+
+ if (!ia32_fxstate) {
/*
* Attempt to restore the FPU registers directly from user
* memory. For that to succeed, the user access cannot cause
@@ -349,38 +339,102 @@
*/
fpregs_lock();
pagefault_disable();
- ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
+ ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only);
pagefault_enable();
if (!ret) {
+
+ /*
+ * Restore supervisor states: previous context switch
+ * etc has done XSAVES and saved the supervisor states
+ * in the kernel buffer from which they can be restored
+ * now.
+ *
+ * We cannot do a single XRSTORS here - which would
+ * be nice - because the rest of the FPU registers are
+ * being restored from a user buffer directly. The
+ * single XRSTORS happens below, when the user buffer
+ * has been copied to the kernel one.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD) &&
+ xfeatures_mask_supervisor())
+ copy_kernel_to_xregs(&fpu->state.xsave,
+ xfeatures_mask_supervisor());
fpregs_mark_activate();
fpregs_unlock();
return 0;
}
- fpregs_deactivate(fpu);
+
+ /*
+ * The above did an FPU restore operation, restricted to
+ * the user portion of the registers, and failed, but the
+ * microcode might have modified the FPU registers
+ * nevertheless.
+ *
+ * If the FPU registers do not belong to current, then
+ * invalidate the FPU register state otherwise the task might
+ * preempt current and return to user space with corrupted
+ * FPU registers.
+ *
+ * In case current owns the FPU registers then no further
+ * action is required. The fixup below will handle it
+ * correctly.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ __cpu_invalidate_fpregs_state();
+
fpregs_unlock();
+ } else {
+ /*
+ * For 32-bit frames with fxstate, copy the fxstate so it can
+ * be reconstructed later.
+ */
+ ret = __copy_from_user(&env, buf, sizeof(env));
+ if (ret)
+ goto out;
+ envp = &env;
}
+ /*
+ * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
+ * not modified on context switch and that the xstate is considered
+ * to be loaded again on return to userland (overriding last_cpu avoids
+ * the optimisation).
+ */
+ fpregs_lock();
+
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+
+ /*
+ * Supervisor states are not modified by user space input. Save
+ * current supervisor states first and invalidate the FPU regs.
+ */
+ if (xfeatures_mask_supervisor())
+ copy_supervisor_to_kernel(&fpu->state.xsave);
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ }
+ __fpu_invalidate_fpregs_state(fpu);
+ fpregs_unlock();
if (use_xsave() && !fx_only) {
- u64 init_bv = xfeatures_mask & ~xfeatures;
+ u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
- if (using_compacted_format()) {
- ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
- } else {
- ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
- if (!ret && state_size > offsetof(struct xregs_state, header))
- ret = validate_xstate_header(&fpu->state.xsave.header);
- }
+ ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
if (ret)
goto out;
- sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+ sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+ fx_only);
fpregs_lock();
if (unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
+
+ /*
+ * Restore previously saved supervisor xstates along with
+ * copied-in user xstates.
+ */
+ ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
+ user_xfeatures | xfeatures_mask_supervisor());
} else if (use_fxsr()) {
ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
@@ -389,11 +443,14 @@
goto out;
}
- sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+ sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+ fx_only);
fpregs_lock();
if (use_xsave()) {
- u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+ u64 init_bv;
+
+ init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
}
@@ -414,7 +471,7 @@
out:
if (ret)
- fpu__clear(fpu);
+ fpu__clear_user_states(fpu);
return ret;
}
@@ -469,7 +526,7 @@
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
- fx_sw_reserved.xfeatures = xfeatures_mask;
+ fx_sw_reserved.xfeatures = xfeatures_mask_user();
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 046782d..80836b9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -37,6 +37,7 @@
"AVX-512 ZMM_Hi256" ,
"Processor Trace (unused)" ,
"Protection Keys User registers",
+ "PASID state",
"unknown xstate feature" ,
};
@@ -51,16 +52,19 @@
X86_FEATURE_AVX512F,
X86_FEATURE_INTEL_PT,
X86_FEATURE_PKU,
+ X86_FEATURE_ENQCMD,
};
/*
- * Mask of xstate features supported by the CPU and the kernel:
+ * This represents the full set of bits that should ever be set in a kernel
+ * XSAVE buffer, both supervisor and user xstates.
*/
-u64 xfeatures_mask __read_mostly;
+u64 xfeatures_mask_all __read_mostly;
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@ -76,7 +80,7 @@
*/
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
- u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+ u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
if (unlikely(feature_name)) {
long xfeature_idx, max_idx;
@@ -107,25 +111,17 @@
}
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
-static int xfeature_is_supervisor(int xfeature_nr)
+static bool xfeature_is_supervisor(int xfeature_nr)
{
/*
- * We currently do not support supervisor states, but if
- * we did, we could find out like this.
- *
- * SDM says: If state component 'i' is a user state component,
- * ECX[0] return 0; if state component i is a supervisor
- * state component, ECX[0] returns 1.
+ * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
+ * returns ECX[0] set to (1) for a supervisor state, and cleared (0)
+ * for a user state.
*/
u32 eax, ebx, ecx, edx;
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
- return !!(ecx & 1);
-}
-
-static int xfeature_is_user(int xfeature_nr)
-{
- return !xfeature_is_supervisor(xfeature_nr);
+ return ecx & 1;
}
/*
@@ -158,7 +154,7 @@
* None of the feature bits are in init state. So nothing else
* to do for us, as the memory layout is up to date.
*/
- if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+ if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
return;
/*
@@ -185,7 +181,7 @@
* in a special way already:
*/
feature_bit = 0x2;
- xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+ xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
/*
* Update all the remaining memory layouts according to their
@@ -213,30 +209,41 @@
*/
void fpu__init_cpu_xstate(void)
{
- if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
+ u64 unsup_bits;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
return;
/*
- * Make it clear that XSAVES supervisor states are not yet
- * implemented should anyone expect it to work by changing
- * bits in XFEATURE_MASK_* macros and XCR0.
+ * Unsupported supervisor xstates should not be found in
+ * the xfeatures mask.
*/
- WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
- "x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+ unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
+ WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
+ unsup_bits);
- xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
+ xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
cr4_set_bits(X86_CR4_OSXSAVE);
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+
+ /*
+ * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
+ * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
+ * states can be set here.
+ */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * MSR_IA32_XSS sets supervisor states managed by XSAVES.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ xfeatures_mask_dynamic());
+ }
}
-/*
- * Note that in the future we will likely need a pair of
- * functions here: one for user xstates and the other for
- * system xstates. For now, they are the same.
- */
-static int xfeature_enabled(enum xfeature xfeature)
+static bool xfeature_enabled(enum xfeature xfeature)
{
- return !!(xfeatures_mask & (1UL << xfeature));
+ return xfeatures_mask_all & BIT_ULL(xfeature);
}
/*
@@ -254,10 +261,13 @@
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_offsets[0] = 0;
- xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
- xstate_offsets[1] = xstate_sizes[0];
- xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
+ xstate_offsets[XFEATURE_FP] = 0;
+ xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
+ xmm_space);
+
+ xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
+ xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
+ xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i))
@@ -265,21 +275,25 @@
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
- /*
- * If an xfeature is supervisor state, the offset
- * in EBX is invalid. We leave it to -1.
- */
- if (xfeature_is_user(i))
- xstate_offsets[i] = ebx;
-
xstate_sizes[i] = eax;
+
/*
- * In our xstate size checks, we assume that the
- * highest-numbered xstate feature has the
- * highest offset in the buffer. Ensure it does.
+ * If an xfeature is supervisor state, the offset in EBX is
+ * invalid, leave it to -1.
+ */
+ if (xfeature_is_supervisor(i))
+ continue;
+
+ xstate_offsets[i] = ebx;
+
+ /*
+ * In our xstate size checks, we assume that the highest-numbered
+ * xstate feature has the highest offset in the buffer. Ensure
+ * it does.
*/
WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
+ "x86/fpu: misordered xstate at %d\n", last_good_offset);
+
last_good_offset = xstate_offsets[i];
}
}
@@ -306,6 +320,7 @@
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
+ print_xstate_feature(XFEATURE_MASK_PASID);
}
/*
@@ -326,6 +341,13 @@
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
+
+ if (!xfeature_enabled(xfeature_nr)) {
+ WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
+ xfeature_nr);
+ return 0;
+ }
+
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
/*
* The value returned by ECX[1] indicates the alignment
@@ -338,11 +360,11 @@
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
- * of the xsave aread.
+ * of the xsave area.
*/
-static void __init setup_xstate_comp(void)
+static void __init setup_xstate_comp_offsets(void)
{
- unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+ unsigned int next_offset;
int i;
/*
@@ -350,36 +372,56 @@
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_comp_offsets[0] = 0;
- xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+ xstate_comp_offsets[XFEATURE_FP] = 0;
+ xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
+ xmm_space);
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i)) {
+ if (xfeature_enabled(i))
xstate_comp_offsets[i] = xstate_offsets[i];
- xstate_comp_sizes[i] = xstate_sizes[i];
- }
}
return;
}
- xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
- FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i))
- xstate_comp_sizes[i] = xstate_sizes[i];
- else
- xstate_comp_sizes[i] = 0;
+ if (!xfeature_enabled(i))
+ continue;
- if (i > FIRST_EXTENDED_XFEATURE) {
- xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
- + xstate_comp_sizes[i-1];
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
- if (xfeature_is_aligned(i))
- xstate_comp_offsets[i] =
- ALIGN(xstate_comp_offsets[i], 64);
- }
+ xstate_comp_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
+ }
+}
+
+/*
+ * Setup offsets of a supervisor-state-only XSAVES buffer:
+ *
+ * The offsets stored in xstate_comp_offsets[] only work for one specific
+ * value of the Requested Feature BitMap (RFBM). In cases where a different
+ * RFBM value is used, a different set of offsets is required. This set of
+ * offsets is for when RFBM=xfeatures_mask_supervisor().
+ */
+static void __init setup_supervisor_only_offsets(void)
+{
+ unsigned int next_offset;
+ int i;
+
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+ continue;
+
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
+
+ xstate_supervisor_only_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
}
}
@@ -414,7 +456,8 @@
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+ XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_PASID)
/*
* setup the xstate image representing the init state
@@ -423,7 +466,9 @@
{
static int on_boot_cpu __initdata = 1;
- BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED);
+ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
+ XFEATURES_INIT_FPSTATE_HANDLED);
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -435,7 +480,8 @@
print_xstate_features();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
+ init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+ xfeatures_mask_all;
/*
* Init all the features state with header.xfeatures being 0x0
@@ -470,7 +516,7 @@
* format. Checking a supervisor state's uncompacted offset is
* an error.
*/
- if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) {
+ if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
return -1;
}
@@ -480,7 +526,7 @@
return ebx;
}
-static int xfeature_size(int xfeature_nr)
+int xfeature_size(int xfeature_nr)
{
u32 eax, ebx, ecx, edx;
@@ -504,10 +550,10 @@
}
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_xstate_header(const struct xstate_header *hdr)
+int validate_user_xstate_header(const struct xstate_header *hdr)
{
/* No unknown or supervisor features may be set */
- if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+ if (hdr->xfeatures & ~xfeatures_mask_user())
return -EINVAL;
/* Userspace must use the uncompacted format */
@@ -584,6 +630,7 @@
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -592,7 +639,8 @@
*/
if ((nr < XFEATURE_YMM) ||
(nr >= XFEATURE_MAX) ||
- (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
+ (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
+ ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -602,6 +650,10 @@
* This essentially double-checks what the cpu told us about
* how large the XSAVE buffer needs to be. We are recalculating
* it to be safe.
+ *
+ * Dynamic XSAVE features allocate their own buffers and are not
+ * covered by these checks. Only the size of the buffer for task->fpu
+ * is checked here.
*/
static void do_extra_xstate_size_checks(void)
{
@@ -642,15 +694,12 @@
/*
- * Get total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0 | IA32_XSS.
*
* Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
- *
- * Note that we do not currently set any bits on IA32_XSS so
- * 'XCR0 | IA32_XSS == XCR0' for now.
*/
static unsigned int __init get_xsaves_size(void)
{
@@ -667,6 +716,33 @@
return ebx;
}
+/*
+ * Get the total size of the enabled xstates without the dynamic supervisor
+ * features.
+ */
+static unsigned int __init get_xsaves_size_no_dynamic(void)
+{
+ u64 mask = xfeatures_mask_dynamic();
+ unsigned int size;
+
+ if (!mask)
+ return get_xsaves_size();
+
+ /* Disable dynamic features. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+
+ /*
+ * Ask the hardware what size is required of the buffer.
+ * This is the size required for the task->fpu buffer.
+ */
+ size = get_xsaves_size();
+
+ /* Re-enable dynamic features so XSAVES will work on them again. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+
+ return size;
+}
+
static unsigned int __init get_xsave_size(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -704,7 +780,7 @@
xsave_size = get_xsave_size();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- possible_xstate_size = get_xsaves_size();
+ possible_xstate_size = get_xsaves_size_no_dynamic();
else
possible_xstate_size = xsave_size;
@@ -732,7 +808,7 @@
*/
static void fpu__init_disable_system_xstate(void)
{
- xfeatures_mask = 0;
+ xfeatures_mask_all = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
@@ -767,16 +843,26 @@
return;
}
+ /*
+ * Find user xstates supported by the processor.
+ */
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xfeatures_mask = eax + ((u64)edx << 32);
+ xfeatures_mask_all = eax + ((u64)edx << 32);
- if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+ /*
+ * Find supervisor xstates supported by the processor.
+ */
+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ xfeatures_mask_all |= ecx + ((u64)edx << 32);
+
+ if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG().
*/
- pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
+ xfeatures_mask_all);
goto out_disable;
}
@@ -785,10 +871,10 @@
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
if (!boot_cpu_has(xsave_cpuid_features[i]))
- xfeatures_mask &= ~BIT(i);
+ xfeatures_mask_all &= ~BIT_ULL(i);
}
- xfeatures_mask &= fpu__get_supported_xfeatures_mask();
+ xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
@@ -800,15 +886,16 @@
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
*/
- update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
+ update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
- setup_xstate_comp();
+ setup_xstate_comp_offsets();
+ setup_supervisor_only_offsets();
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
- xfeatures_mask,
+ xfeatures_mask_all,
fpu_kernel_xstate_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return;
@@ -827,7 +914,16 @@
* Restore XCR0 on xsave capable CPUs:
*/
if (boot_cpu_has(X86_FEATURE_XSAVE))
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * Restore IA32_XSS. The same CPUID bit enumerates support
+ * of XSAVES and MSR_IA32_XSS.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ xfeatures_mask_dynamic());
+ }
}
/*
@@ -872,10 +968,9 @@
/*
* We should not ever be requesting features that we
- * have not enabled. Remember that pcntxt_mask is
- * what we write to the XCR0 register.
+ * have not enabled.
*/
- WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
+ WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
@@ -989,32 +1084,10 @@
return true;
}
-static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count)
+static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+ void *init_xstate, unsigned int size)
{
- if (*pos < to) {
- unsigned size = to - *pos;
-
- if (size > *count)
- size = *count;
- memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size);
- *kbuf += size;
- *pos += size;
- *count -= size;
- }
-}
-
-static void copy_part(unsigned offset, unsigned size, void *from,
- void **kbuf, unsigned *pos, unsigned *count)
-{
- fill_gap(offset, kbuf, pos, count);
- if (size > *count)
- size = *count;
- if (size) {
- memcpy(*kbuf, from, size);
- *kbuf += size;
- *pos += size;
- *count -= size;
- }
+ membuf_write(to, from_xstate ? xstate : init_xstate, size);
}
/*
@@ -1024,154 +1097,83 @@
* It supports partial copy but pos always starts from zero. This is called
* from xstateregs_get() and there we check the CPU has XSAVES.
*/
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave)
{
+ const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+ struct xregs_state *xinit = &init_fpstate.xsave;
struct xstate_header header;
- const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr);
- unsigned count = size_total;
+ unsigned int zerofrom;
int i;
/*
- * Currently copy_regset_to_user() starts from pos 0:
- */
- if (unlikely(offset_start != 0))
- return -EFAULT;
-
- /*
* The destination is a ptrace buffer; we put in only user xstates:
*/
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+ header.xfeatures &= xfeatures_mask_user();
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(0, off_mxcsr,
- &xsave->i387, &kbuf, &offset_start, &count);
- if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM))
- copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE,
- &xsave->i387.mxcsr, &kbuf, &offset_start, &count);
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(offsetof(struct fxregs_state, st_space), 128,
- &xsave->i387.st_space, &kbuf, &offset_start, &count);
- if (header.xfeatures & XFEATURE_MASK_SSE)
- copy_part(xstate_offsets[XFEATURE_SSE], 256,
- &xsave->i387.xmm_space, &kbuf, &offset_start, &count);
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- copy_part(offsetof(struct fxregs_state, sw_reserved), 48,
- xstate_fx_sw_bytes, &kbuf, &offset_start, &count);
- /*
- * Copy xregs_state->header:
- */
- copy_part(offsetof(struct xregs_state, header), sizeof(header),
- &header, &kbuf, &offset_start, &count);
+ /* Copy FP state up to MXCSR */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
+ &xinit->i387, off_mxcsr);
+
+ /* Copy MXCSR when SSE or YMM are set in the feature mask */
+ copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
+ &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
+ MXCSR_AND_FLAGS_SIZE);
+
+ /* Copy the remaining FP state */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP,
+ &to, &xsave->i387.st_space, &xinit->i387.st_space,
+ sizeof(xsave->i387.st_space));
+
+ /* Copy the SSE state - shared with YMM, but independently managed */
+ copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
+ &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
+ sizeof(xsave->i387.xmm_space));
+
+ /* Zero the padding area */
+ membuf_zero(&to, sizeof(xsave->i387.padding));
+
+ /* Copy xsave->i387.sw_reserved */
+ membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
+
+ /* Copy the user space relevant state of @xsave->header */
+ membuf_write(&to, &header, sizeof(header));
+
+ zerofrom = offsetof(struct xregs_state, extended_state_area);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
/*
- * Copy only in-use xstates:
+ * The ptrace buffer is in non-compacted XSAVE format.
+ * In non-compacted format disabled features still occupy
+ * state space, but there is no state to copy from in the
+ * compacted init_fpstate. The gap tracking will zero this
+ * later.
*/
- if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, i);
+ if (!(xfeatures_mask_user() & BIT_ULL(i)))
+ continue;
- copy_part(xstate_offsets[i], xstate_sizes[i],
- src, &kbuf, &offset_start, &count);
- }
-
- }
- fill_gap(size_total, &kbuf, &offset_start, &count);
-
- return 0;
-}
-
-static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
-{
- if (!size)
- return 0;
-
- if (offset < size_total) {
- unsigned int copy = min(size, size_total - offset);
-
- if (__copy_to_user(ubuf + offset, data, copy))
- return -EFAULT;
- }
- return 0;
-}
-
-/*
- * Convert from kernel XSAVES compacted format to standard format and copy
- * to a user-space buffer. It supports partial copy but pos always starts from
- * zero. This is called from xstateregs_get() and there we check the CPU
- * has XSAVES.
- */
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
-{
- unsigned int offset, size;
- int ret, i;
- struct xstate_header header;
-
- /*
- * Currently copy_regset_to_user() starts from pos 0:
- */
- if (unlikely(offset_start != 0))
- return -EFAULT;
-
- /*
- * The destination is a ptrace buffer; we put in only user xstates:
- */
- memset(&header, 0, sizeof(header));
- header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
-
- /*
- * Copy xregs_state->header:
- */
- offset = offsetof(struct xregs_state, header);
- size = sizeof(header);
-
- ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
- if (ret)
- return ret;
-
- for (i = 0; i < XFEATURE_MAX; i++) {
/*
- * Copy only in-use xstates:
+ * If there was a feature or alignment gap, zero the space
+ * in the destination buffer.
*/
- if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, i);
+ if (zerofrom < xstate_offsets[i])
+ membuf_zero(&to, xstate_offsets[i] - zerofrom);
- offset = xstate_offsets[i];
- size = xstate_sizes[i];
+ copy_feature(header.xfeatures & BIT_ULL(i), &to,
+ __raw_xsave_addr(xsave, i),
+ __raw_xsave_addr(xinit, i),
+ xstate_sizes[i]);
- /* The next component has to fit fully into the output buffer: */
- if (offset + size > size_total)
- break;
-
- ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
- if (ret)
- return ret;
- }
-
+ /*
+ * Keep track of the last copied state in the non-compacted
+ * target buffer for gap zeroing.
+ */
+ zerofrom = xstate_offsets[i] + xstate_sizes[i];
}
- if (xfeatures_mxcsr_quirk(header.xfeatures)) {
- offset = offsetof(struct fxregs_state, mxcsr);
- size = MXCSR_AND_FLAGS_SIZE;
- __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
- }
-
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- offset = offsetof(struct fxregs_state, sw_reserved);
- size = sizeof(xstate_fx_sw_bytes);
-
- ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
- if (ret)
- return ret;
-
- return 0;
+ if (to.left)
+ membuf_zero(&to, to.left);
}
/*
@@ -1189,7 +1191,7 @@
memcpy(&hdr, kbuf + offset, size);
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1215,7 +1217,7 @@
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1243,7 +1245,7 @@
if (__copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT;
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1271,7 +1273,7 @@
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1281,6 +1283,133 @@
return 0;
}
+/*
+ * Save only supervisor states to the kernel buffer. This blows away all
+ * old states, and is intended to be used only in __fpu__restore_sig(), where
+ * user states are restored from the user buffer.
+ */
+void copy_supervisor_to_kernel(struct xregs_state *xstate)
+{
+ struct xstate_header *header;
+ u64 max_bit, min_bit;
+ u32 lmask, hmask;
+ int err, i;
+
+ if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (!xfeatures_mask_supervisor())
+ return;
+
+ max_bit = __fls(xfeatures_mask_supervisor());
+ min_bit = __ffs(xfeatures_mask_supervisor());
+
+ lmask = xfeatures_mask_supervisor();
+ hmask = xfeatures_mask_supervisor() >> 32;
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+ /* We should never fault when copying to a kernel buffer: */
+ if (WARN_ON_FPU(err))
+ return;
+
+ /*
+ * At this point, the buffer has only supervisor states and must be
+ * converted back to normal kernel format.
+ */
+ header = &xstate->header;
+ header->xcomp_bv |= xfeatures_mask_all;
+
+ /*
+ * This only moves states up in the buffer. Start with
+ * the last state and move backwards so that states are
+ * not overwritten until after they are moved. Note:
+ * memmove() allows overlapping src/dst buffers.
+ */
+ for (i = max_bit; i >= min_bit; i--) {
+ u8 *xbuf = (u8 *)xstate;
+
+ if (!((header->xfeatures >> i) & 1))
+ continue;
+
+ /* Move xfeature 'i' into its normal location */
+ memmove(xbuf + xstate_comp_offsets[i],
+ xbuf + xstate_supervisor_only_offsets[i],
+ xstate_sizes[i]);
+ }
+}
+
+/**
+ * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to
+ * an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features saved into the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are saved into the xsave
+ * area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic
+ * supervisor feature). Besides the dynamic supervisor states, the legacy
+ * region and XSAVE header are also saved into the xsave area. The supervisor
+ * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask)
+{
+ u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+ u32 lmask, hmask;
+ int err;
+
+ if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (WARN_ON_FPU(!dynamic_mask))
+ return;
+
+ lmask = dynamic_mask;
+ hmask = dynamic_mask >> 32;
+
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+ /* Should never fault when copying to a kernel buffer */
+ WARN_ON_FPU(err);
+}
+
+/**
+ * copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from
+ * an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features restored from the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are restored from the
+ * xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of
+ * dynamic supervisor feature). Besides the dynamic supervisor states, the
+ * legacy region and XSAVE header are also restored from the xsave area. The
+ * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask)
+{
+ u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+ u32 lmask, hmask;
+ int err;
+
+ if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (WARN_ON_FPU(!dynamic_mask))
+ return;
+
+ lmask = dynamic_mask;
+ hmask = dynamic_mask >> 32;
+
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+
+ /* Should never fault when copying from a kernel buffer */
+ WARN_ON_FPU(err);
+}
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* Report the amount of time elapsed in millisecond since last AVX512