Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore
index 8df89f0..8ae0f93 100644
--- a/arch/x86/lib/.gitignore
+++ b/arch/x86/lib/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
inat-tables.c
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 5246db4..bad4dee 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -6,16 +6,25 @@
# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
+# KCSAN uses udelay for introducing watchpoint delay; avoid recursion.
+KCSAN_SANITIZE_delay.o := n
+ifdef CONFIG_KCSAN
+# In case KCSAN+lockdep+ftrace are enabled, disable ftrace for delay.o to avoid
+# lockdep -> [other libs] -> KCSAN -> udelay -> ftrace -> lockdep recursion.
+CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
+endif
+
# Early boot use of cmdline; don't instrument it
ifdef CONFIG_AMD_MEM_ENCRYPT
KCOV_INSTRUMENT_cmdline.o := n
KASAN_SANITIZE_cmdline.o := n
+KCSAN_SANITIZE_cmdline.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_cmdline.o = -pg
endif
-CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector)
+CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
endif
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
@@ -35,6 +44,7 @@
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e0788ba..3b65441 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -20,10 +20,10 @@
#define BEGIN(op) \
.macro endp; \
-ENDPROC(atomic64_##op##_386); \
+SYM_FUNC_END(atomic64_##op##_386); \
.purgem endp; \
.endm; \
-ENTRY(atomic64_##op##_386); \
+SYM_FUNC_START(atomic64_##op##_386); \
LOCK v;
#define ENDP endp
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 843d978..1c5c81c 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -16,12 +16,12 @@
cmpxchg8b (\reg)
.endm
-ENTRY(atomic64_read_cx8)
+SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
ret
-ENDPROC(atomic64_read_cx8)
+SYM_FUNC_END(atomic64_read_cx8)
-ENTRY(atomic64_set_cx8)
+SYM_FUNC_START(atomic64_set_cx8)
1:
/* we don't need LOCK_PREFIX since aligned 64-bit writes
* are atomic on 586 and newer */
@@ -29,19 +29,19 @@
jne 1b
ret
-ENDPROC(atomic64_set_cx8)
+SYM_FUNC_END(atomic64_set_cx8)
-ENTRY(atomic64_xchg_cx8)
+SYM_FUNC_START(atomic64_xchg_cx8)
1:
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
ret
-ENDPROC(atomic64_xchg_cx8)
+SYM_FUNC_END(atomic64_xchg_cx8)
.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebp
pushl %ebx
pushl %esi
@@ -69,14 +69,14 @@
popl %ebx
popl %ebp
ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
addsub_return add add adc
addsub_return sub sub sbb
.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebx
read64 %esi
@@ -94,13 +94,13 @@
movl %ecx, %edx
popl %ebx
ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
incdec_return inc add adc
incdec_return dec sub sbb
-ENTRY(atomic64_dec_if_positive_cx8)
+SYM_FUNC_START(atomic64_dec_if_positive_cx8)
pushl %ebx
read64 %esi
@@ -119,9 +119,9 @@
movl %ecx, %edx
popl %ebx
ret
-ENDPROC(atomic64_dec_if_positive_cx8)
+SYM_FUNC_END(atomic64_dec_if_positive_cx8)
-ENTRY(atomic64_add_unless_cx8)
+SYM_FUNC_START(atomic64_add_unless_cx8)
pushl %ebp
pushl %ebx
/* these just push these two parameters on the stack */
@@ -155,9 +155,9 @@
jne 2b
xorl %eax, %eax
jmp 3b
-ENDPROC(atomic64_add_unless_cx8)
+SYM_FUNC_END(atomic64_add_unless_cx8)
-ENTRY(atomic64_inc_not_zero_cx8)
+SYM_FUNC_START(atomic64_inc_not_zero_cx8)
pushl %ebx
read64 %esi
@@ -177,4 +177,4 @@
3:
popl %ebx
ret
-ENDPROC(atomic64_inc_not_zero_cx8)
+SYM_FUNC_END(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4df90c9..4304320 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -46,7 +46,7 @@
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
pushl %esi
pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
@@ -128,13 +128,13 @@
popl %ebx
popl %esi
ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
#else
/* Version for PentiumII/PPro */
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
pushl %esi
pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
@@ -153,7 +153,7 @@
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -246,59 +246,48 @@
popl %ebx
popl %esi
ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
#endif
EXPORT_SYMBOL(csum_partial)
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
- int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ int len)
*/
/*
* Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- * DST definitions? It's damn hard to trigger all cases. I hope I got
- * them all but there's no guarantee.
*/
-#define SRC(y...) \
+#define EXC(y...) \
9999: y; \
_ASM_EXTABLE_UA(9999b, 6001f)
-#define DST(y...) \
- 9999: y; \
- _ASM_EXTABLE_UA(9999b, 6002f)
-
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
#define ARGBASE 16
#define FP 12
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
subl $4,%esp
pushl %edi
pushl %esi
pushl %ebx
- movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
movl ARGBASE+8(%esp),%edi # dst
+ movl $-1, %eax # sum
testl $2, %edi # Check alignment.
jz 2f # Jump if alignment is ok.
subl $2, %ecx # Alignment uses up two bytes.
jae 1f # Jump if we had at least two bytes.
addl $2, %ecx # ecx was < 2. Deal with it.
jmp 4f
-SRC(1: movw (%esi), %bx )
+EXC(1: movw (%esi), %bx )
addl $2, %esi
-DST( movw %bx, (%edi) )
+EXC( movw %bx, (%edi) )
addl $2, %edi
addw %bx, %ax
adcl $0, %eax
@@ -306,34 +295,34 @@
movl %ecx, FP(%esp)
shrl $5, %ecx
jz 2f
- testl %esi, %esi
-SRC(1: movl (%esi), %ebx )
-SRC( movl 4(%esi), %edx )
+ testl %esi, %esi # what's wrong with clc?
+EXC(1: movl (%esi), %ebx )
+EXC( movl 4(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, (%edi) )
+EXC( movl %ebx, (%edi) )
adcl %edx, %eax
-DST( movl %edx, 4(%edi) )
+EXC( movl %edx, 4(%edi) )
-SRC( movl 8(%esi), %ebx )
-SRC( movl 12(%esi), %edx )
+EXC( movl 8(%esi), %ebx )
+EXC( movl 12(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 8(%edi) )
+EXC( movl %ebx, 8(%edi) )
adcl %edx, %eax
-DST( movl %edx, 12(%edi) )
+EXC( movl %edx, 12(%edi) )
-SRC( movl 16(%esi), %ebx )
-SRC( movl 20(%esi), %edx )
+EXC( movl 16(%esi), %ebx )
+EXC( movl 20(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 16(%edi) )
+EXC( movl %ebx, 16(%edi) )
adcl %edx, %eax
-DST( movl %edx, 20(%edi) )
+EXC( movl %edx, 20(%edi) )
-SRC( movl 24(%esi), %ebx )
-SRC( movl 28(%esi), %edx )
+EXC( movl 24(%esi), %ebx )
+EXC( movl 28(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 24(%edi) )
+EXC( movl %ebx, 24(%edi) )
adcl %edx, %eax
-DST( movl %edx, 28(%edi) )
+EXC( movl %edx, 28(%edi) )
lea 32(%esi), %esi
lea 32(%edi), %edi
@@ -345,9 +334,9 @@
andl $0x1c, %edx
je 4f
shrl $2, %edx # This clears CF
-SRC(3: movl (%esi), %ebx )
+EXC(3: movl (%esi), %ebx )
adcl %ebx, %eax
-DST( movl %ebx, (%edi) )
+EXC( movl %ebx, (%edi) )
lea 4(%esi), %esi
lea 4(%edi), %edi
dec %edx
@@ -357,39 +346,24 @@
jz 7f
cmpl $2, %ecx
jb 5f
-SRC( movw (%esi), %cx )
+EXC( movw (%esi), %cx )
leal 2(%esi), %esi
-DST( movw %cx, (%edi) )
+EXC( movw %cx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%ecx
-SRC(5: movb (%esi), %cl )
-DST( movb %cl, (%edi) )
+EXC(5: movb (%esi), %cl )
+EXC( movb %cl, (%edi) )
6: addl %ecx, %eax
adcl $0, %eax
7:
-5000:
# Exception handler:
.section .fixup, "ax"
6001:
- movl ARGBASE+20(%esp), %ebx # src_err_ptr
- movl $-EFAULT, (%ebx)
-
- # zero the complete destination - computing the rest
- # is too much work
- movl ARGBASE+8(%esp), %edi # dst
- movl ARGBASE+12(%esp), %ecx # len
- xorl %eax,%eax
- rep ; stosb
-
- jmp 5000b
-
-6002:
- movl ARGBASE+24(%esp), %ebx # dst_err_ptr
- movl $-EFAULT,(%ebx)
- jmp 5000b
+ xorl %eax, %eax
+ jmp 7b
.previous
@@ -398,32 +372,32 @@
popl %edi
popl %ecx # equivalent to addl $4,%esp
ret
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
#else
/* Version for PentiumII/PPro */
#define ROUND1(x) \
- SRC(movl x(%esi), %ebx ) ; \
+ EXC(movl x(%esi), %ebx ) ; \
addl %ebx, %eax ; \
- DST(movl %ebx, x(%edi) ) ;
+ EXC(movl %ebx, x(%edi) ) ;
#define ROUND(x) \
- SRC(movl x(%esi), %ebx ) ; \
+ EXC(movl x(%esi), %ebx ) ; \
adcl %ebx, %eax ; \
- DST(movl %ebx, x(%edi) ) ;
+ EXC(movl %ebx, x(%edi) ) ;
#define ARGBASE 12
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
pushl %ebx
pushl %edi
pushl %esi
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
- movl ARGBASE+16(%esp),%eax #sum
+ movl $-1, %eax #sum
# movl %ecx, %edx
movl %ecx, %ebx
movl %esi, %edx
@@ -436,10 +410,10 @@
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
1: addl $64,%esi
addl $64,%edi
- SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
+ EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
@@ -453,29 +427,20 @@
jz 7f
cmpl $2, %edx
jb 5f
-SRC( movw (%esi), %dx )
+EXC( movw (%esi), %dx )
leal 2(%esi), %esi
-DST( movw %dx, (%edi) )
+EXC( movw %dx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%edx
5:
-SRC( movb (%esi), %dl )
-DST( movb %dl, (%edi) )
+EXC( movb (%esi), %dl )
+EXC( movb %dl, (%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
.section .fixup, "ax"
-6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
- movl $-EFAULT, (%ebx)
- # zero the complete destination (computing the rest is too much work)
- movl ARGBASE+8(%esp),%edi # dst
- movl ARGBASE+12(%esp),%ecx # len
- xorl %eax,%eax
- rep; stosb
- jmp 7b
-6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
- movl $-EFAULT, (%ebx)
+6001: xorl %eax, %eax
jmp 7b
.previous
@@ -483,7 +448,7 @@
popl %edi
popl %ebx
ret
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
#undef ROUND
#undef ROUND1
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 75a5a45..c4c7dd1 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -13,15 +13,15 @@
* Zero a page.
* %rdi - page
*/
-ENTRY(clear_page_rep)
+SYM_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
-ENDPROC(clear_page_rep)
+SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
-ENTRY(clear_page_orig)
+SYM_FUNC_START(clear_page_orig)
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -40,13 +40,13 @@
jnz .Lloop
nop
ret
-ENDPROC(clear_page_orig)
+SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
-ENTRY(clear_page_erms)
+SYM_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
-ENDPROC(clear_page_erms)
+SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 4f1719e..b6da093 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -58,7 +58,7 @@
state = st_wordcmp;
opptr = option;
wstart = pos;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if (!*opptr) {
@@ -89,7 +89,7 @@
break;
}
state = st_wordskip;
- /* fall through */
+ fallthrough;
case st_wordskip:
if (!c)
@@ -151,7 +151,7 @@
state = st_wordcmp;
opptr = option;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if ((c == '=') && !*opptr) {
@@ -172,7 +172,7 @@
break;
}
state = st_wordskip;
- /* fall through */
+ fallthrough;
case st_wordskip:
if (myisspace(c))
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index d631856..3542502 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -13,7 +13,7 @@
* %rcx : high 64 bits of new value
* %al : Operation successful
*/
-ENTRY(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
#
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
@@ -44,4 +44,4 @@
xor %al,%al
ret
-ENDPROC(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 691d80e..ca01ed6 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -13,7 +13,7 @@
* %ebx : low 32 bits of new value
* %ecx : high 32 bits of new value
*/
-ENTRY(cmpxchg8b_emu)
+SYM_FUNC_START(cmpxchg8b_emu)
#
# Emulate 'cmpxchg8b (%esi)' on UP except we don't
@@ -42,5 +42,5 @@
popfl
ret
-ENDPROC(cmpxchg8b_emu)
+SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
new file mode 100644
index 0000000..c13e8c9
--- /dev/null
+++ b/arch/x86/lib/copy_mc.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/mce.h>
+
+#ifdef CONFIG_X86_MCE
+/*
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
+ * implementation.
+ */
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
+
+void enable_copy_mc_fragile(void)
+{
+ static_branch_inc(©_mc_fragile_key);
+}
+#define copy_mc_fragile_enabled (static_branch_unlikely(©_mc_fragile_key))
+
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
+ * source exception point.
+ */
+__visible notrace unsigned long
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++)
+ if (copy_mc_fragile(to, from, 1))
+ break;
+ return len;
+}
+#else
+/*
+ * No point in doing careful copying, or consulting a static key when
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
+ */
+void enable_copy_mc_fragile(void)
+{
+}
+#define copy_mc_fragile_enabled (0)
+#endif
+
+unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
+
+/**
+ * copy_mc_to_kernel - memory copy that handles source exceptions
+ *
+ * @dst: destination address
+ * @src: source address
+ * @len: number of bytes to copy
+ *
+ * Call into the 'fragile' version on systems that benefit from avoiding
+ * corner case poison consumption scenarios, For example, accessing
+ * poison across 2 cachelines with a single instruction. Almost all
+ * other uses case can use copy_mc_enhanced_fast_string() for a fast
+ * recoverable copy, or fallback to plain memcpy.
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
+{
+ if (copy_mc_fragile_enabled)
+ return copy_mc_fragile(dst, src, len);
+ if (static_cpu_has(X86_FEATURE_ERMS))
+ return copy_mc_enhanced_fast_string(dst, src, len);
+ memcpy(dst, src, len);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
+
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+{
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ __uaccess_begin();
+ ret = copy_mc_fragile(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ __uaccess_begin();
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ return copy_user_generic(dst, src, len);
+}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
new file mode 100644
index 0000000..892d891
--- /dev/null
+++ b/arch/x86/lib/copy_mc_64.S
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/linkage.h>
+#include <asm/copy_mc_test.h>
+#include <asm/export.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_UML
+
+#ifdef CONFIG_X86_MCE
+COPY_MC_TEST_CTL
+
+/*
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
+ *
+ * The 'fragile' version is opted into by platform quirks and takes
+ * pains to avoid unrecoverable corner cases like 'fast-string'
+ * instruction sequences, and consuming poison across a cacheline
+ * boundary. The non-fragile version is equivalent to memcpy()
+ * regardless of CPU machine-check-recovery capability.
+ */
+SYM_FUNC_START(copy_mc_fragile)
+ cmpl $8, %edx
+ /* Less than 8 bytes? Go to byte copy loop */
+ jb .L_no_whole_words
+
+ /* Check for bad alignment of source */
+ testl $7, %esi
+ /* Already aligned */
+ jz .L_8byte_aligned
+
+ /* Copy one byte at a time until source is 8-byte aligned */
+ movl %esi, %ecx
+ andl $7, %ecx
+ subl $8, %ecx
+ negl %ecx
+ subl %ecx, %edx
+.L_read_leading_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+ movl %edx, %ecx
+ andl $7, %edx
+ shrl $3, %ecx
+ jz .L_no_whole_words
+
+.L_read_words:
+ movq (%rsi), %r8
+ COPY_MC_TEST_SRC %rsi 8 .E_read_words
+ COPY_MC_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
+ decl %ecx
+ jnz .L_read_words
+
+ /* Any trailing bytes? */
+.L_no_whole_words:
+ andl %edx, %edx
+ jz .L_done_memcpy_trap
+
+ /* Copy trailing bytes */
+ movl %edx, %ecx
+.L_read_trailing_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_trailing_bytes
+
+ /* Copy successful. Return zero */
+.L_done_memcpy_trap:
+ xorl %eax, %eax
+.L_done:
+ ret
+SYM_FUNC_END(copy_mc_fragile)
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+ .section .fixup, "ax"
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
+ jmp .L_done
+
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp copy_mc_fragile_handle_tail
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif /* CONFIG_X86_MCE */
+
+/*
+ * copy_mc_enhanced_fast_string - memory copy with exception handling
+ *
+ * Fast string copy + fault / exception handling. If the CPU does
+ * support machine check exception recovery, but does not support
+ * recovering from fast-string exceptions then this CPU needs to be
+ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
+ * machine check recovery support this version should be no slower than
+ * standard memcpy.
+ */
+SYM_FUNC_START(copy_mc_enhanced_fast_string)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+.L_copy:
+ rep movsb
+ /* Copy successful. Return zero */
+ xorl %eax, %eax
+ ret
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+ .section .fixup, "ax"
+.E_copy:
+ /*
+ * On fault %rcx is updated such that the copy instruction could
+ * optionally be restarted at the fault position, i.e. it
+ * contains 'bytes remaining'. A non-zero return indicates error
+ * to copy_mc_generic() users, or indicate short transfers to
+ * user-copy routines.
+ */
+ movq %rcx, %rax
+ ret
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
+#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index fd2d09a..2402d4c 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,15 +13,15 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-ENTRY(copy_page)
+SYM_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
-ENDPROC(copy_page)
+SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
-ENTRY(copy_page_regs)
+SYM_FUNC_START_LOCAL(copy_page_regs)
subq $2*8, %rsp
movq %rbx, (%rsp)
movq %r12, 1*8(%rsp)
@@ -86,4 +86,4 @@
movq 1*8(%rsp), %r12
addq $2*8, %rsp
ret
-ENDPROC(copy_page_regs)
+SYM_FUNC_END(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 86976b5..77b9b2a 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -36,8 +37,8 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
+ _ASM_EXTABLE_CPY(100b, 103b)
+ _ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -53,7 +54,7 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_unrolled)
+SYM_FUNC_START(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -116,27 +117,27 @@
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE_UA(1b, 30b)
- _ASM_EXTABLE_UA(2b, 30b)
- _ASM_EXTABLE_UA(3b, 30b)
- _ASM_EXTABLE_UA(4b, 30b)
- _ASM_EXTABLE_UA(5b, 30b)
- _ASM_EXTABLE_UA(6b, 30b)
- _ASM_EXTABLE_UA(7b, 30b)
- _ASM_EXTABLE_UA(8b, 30b)
- _ASM_EXTABLE_UA(9b, 30b)
- _ASM_EXTABLE_UA(10b, 30b)
- _ASM_EXTABLE_UA(11b, 30b)
- _ASM_EXTABLE_UA(12b, 30b)
- _ASM_EXTABLE_UA(13b, 30b)
- _ASM_EXTABLE_UA(14b, 30b)
- _ASM_EXTABLE_UA(15b, 30b)
- _ASM_EXTABLE_UA(16b, 30b)
- _ASM_EXTABLE_UA(18b, 40b)
- _ASM_EXTABLE_UA(19b, 40b)
- _ASM_EXTABLE_UA(21b, 50b)
- _ASM_EXTABLE_UA(22b, 50b)
-ENDPROC(copy_user_generic_unrolled)
+ _ASM_EXTABLE_CPY(1b, 30b)
+ _ASM_EXTABLE_CPY(2b, 30b)
+ _ASM_EXTABLE_CPY(3b, 30b)
+ _ASM_EXTABLE_CPY(4b, 30b)
+ _ASM_EXTABLE_CPY(5b, 30b)
+ _ASM_EXTABLE_CPY(6b, 30b)
+ _ASM_EXTABLE_CPY(7b, 30b)
+ _ASM_EXTABLE_CPY(8b, 30b)
+ _ASM_EXTABLE_CPY(9b, 30b)
+ _ASM_EXTABLE_CPY(10b, 30b)
+ _ASM_EXTABLE_CPY(11b, 30b)
+ _ASM_EXTABLE_CPY(12b, 30b)
+ _ASM_EXTABLE_CPY(13b, 30b)
+ _ASM_EXTABLE_CPY(14b, 30b)
+ _ASM_EXTABLE_CPY(15b, 30b)
+ _ASM_EXTABLE_CPY(16b, 30b)
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
+SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
/* Some CPUs run faster using the string copy instructions.
@@ -157,7 +158,7 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_string)
+SYM_FUNC_START(copy_user_generic_string)
ASM_STAC
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
@@ -180,9 +181,9 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 11b)
- _ASM_EXTABLE_UA(3b, 12b)
-ENDPROC(copy_user_generic_string)
+ _ASM_EXTABLE_CPY(1b, 11b)
+ _ASM_EXTABLE_CPY(3b, 12b)
+SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
/*
@@ -197,7 +198,7 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_enhanced_fast_string)
+SYM_FUNC_START(copy_user_enhanced_fast_string)
ASM_STAC
cmpl $64,%edx
jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
@@ -213,14 +214,15 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 12b)
-ENDPROC(copy_user_enhanced_fast_string)
+ _ASM_EXTABLE_CPY(1b, 12b)
+SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
/*
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -230,16 +232,28 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ALIGN;
-.Lcopy_user_handle_tail:
+SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
+ cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
+ je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
- _ASM_EXTABLE_UA(1b, 2b)
-END(.Lcopy_user_handle_tail)
+ /*
+ * Return zero to pretend that this copy succeeded. This
+ * is counter-intuitive, but needed to prevent the code
+ * in lib/iov_iter.c from retrying and running back into
+ * the poison cache line again. The machine check handler
+ * will ensure that a SIGBUS is sent to the task.
+ */
+3: xorl %eax,%eax
+ ASM_CLAC
+ ret
+
+ _ASM_EXTABLE_CPY(1b, 2b)
+SYM_CODE_END(.Lcopy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
@@ -250,7 +264,7 @@
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*/
-ENTRY(__copy_user_nocache)
+SYM_FUNC_START(__copy_user_nocache)
ASM_STAC
/* If size is less than 8 bytes, go to 4-byte copy */
@@ -367,27 +381,27 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
- _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
-ENDPROC(__copy_user_nocache)
+ _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
+SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index a4a379e..1fbd8ee 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -18,9 +18,6 @@
* rdi source
* rsi destination
* edx len (32bit)
- * ecx sum (32bit)
- * r8 src_err_ptr (int)
- * r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
@@ -31,44 +28,32 @@
.macro source
10:
- _ASM_EXTABLE_UA(10b, .Lbad_source)
+ _ASM_EXTABLE_UA(10b, .Lfault)
.endm
.macro dest
20:
- _ASM_EXTABLE_UA(20b, .Lbad_dest)
+ _ASM_EXTABLE_UA(20b, .Lfault)
.endm
- /*
- * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
- * potentially unmapped kernel address.
- */
- .macro ignore L=.Lignore
-30:
- _ASM_EXTABLE(30b, \L)
- .endm
+SYM_FUNC_START(csum_partial_copy_generic)
+ subq $5*8, %rsp
+ movq %rbx, 0*8(%rsp)
+ movq %r12, 1*8(%rsp)
+ movq %r14, 2*8(%rsp)
+ movq %r13, 3*8(%rsp)
+ movq %r15, 4*8(%rsp)
-
-ENTRY(csum_partial_copy_generic)
- cmpl $3*64, %edx
- jle .Lignore
-
-.Lignore:
- subq $7*8, %rsp
- movq %rbx, 2*8(%rsp)
- movq %r12, 3*8(%rsp)
- movq %r14, 4*8(%rsp)
- movq %r13, 5*8(%rsp)
- movq %r15, 6*8(%rsp)
-
- movq %r8, (%rsp)
- movq %r9, 1*8(%rsp)
-
- movl %ecx, %eax
- movl %edx, %ecx
-
+ movl $-1, %eax
xorl %r9d, %r9d
- movq %rcx, %r12
+ movl %edx, %ecx
+ cmpl $8, %ecx
+ jb .Lshort
+
+ testb $7, %sil
+ jne .Lunaligned
+.Laligned:
+ movl %ecx, %r12d
shrq $6, %r12
jz .Lhandle_tail /* < 64 */
@@ -99,7 +84,12 @@
source
movq 56(%rdi), %r13
- ignore 2f
+30:
+ /*
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+ * potentially unmapped kernel address.
+ */
+ _ASM_EXTABLE(30b, 2f)
prefetcht0 5*64(%rdi)
2:
adcq %rbx, %rax
@@ -131,8 +121,6 @@
dest
movq %r13, 56(%rsi)
-3:
-
leaq 64(%rdi), %rdi
leaq 64(%rsi), %rsi
@@ -142,8 +130,8 @@
/* do last up to 56 bytes */
.Lhandle_tail:
- /* ecx: count */
- movl %ecx, %r10d
+ /* ecx: count, rcx.63: the end result needs to be rol8 */
+ movq %rcx, %r10
andl $63, %ecx
shrl $3, %ecx
jz .Lfold
@@ -172,6 +160,7 @@
.Lhandle_7:
movl %r10d, %ecx
andl $7, %ecx
+.L1: /* .Lshort rejoins the common path here */
shrl $1, %ecx
jz .Lhandle_1
movl $2, %edx
@@ -203,26 +192,65 @@
adcl %r9d, %eax /* carry */
.Lende:
- movq 2*8(%rsp), %rbx
- movq 3*8(%rsp), %r12
- movq 4*8(%rsp), %r14
- movq 5*8(%rsp), %r13
- movq 6*8(%rsp), %r15
- addq $7*8, %rsp
+ testq %r10, %r10
+ js .Lwas_odd
+.Lout:
+ movq 0*8(%rsp), %rbx
+ movq 1*8(%rsp), %r12
+ movq 2*8(%rsp), %r14
+ movq 3*8(%rsp), %r13
+ movq 4*8(%rsp), %r15
+ addq $5*8, %rsp
ret
+.Lshort:
+ movl %ecx, %r10d
+ jmp .L1
+.Lunaligned:
+ xorl %ebx, %ebx
+ testb $1, %sil
+ jne .Lodd
+1: testb $2, %sil
+ je 2f
+ source
+ movw (%rdi), %bx
+ dest
+ movw %bx, (%rsi)
+ leaq 2(%rdi), %rdi
+ subq $2, %rcx
+ leaq 2(%rsi), %rsi
+ addq %rbx, %rax
+2: testb $4, %sil
+ je .Laligned
+ source
+ movl (%rdi), %ebx
+ dest
+ movl %ebx, (%rsi)
+ leaq 4(%rdi), %rdi
+ subq $4, %rcx
+ leaq 4(%rsi), %rsi
+ addq %rbx, %rax
+ jmp .Laligned
- /* Exception handlers. Very simple, zeroing is done in the wrappers */
-.Lbad_source:
- movq (%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
+.Lodd:
+ source
+ movb (%rdi), %bl
+ dest
+ movb %bl, (%rsi)
+ leaq 1(%rdi), %rdi
+ leaq 1(%rsi), %rsi
+ /* decrement, set MSB */
+ leaq -1(%rcx, %rcx), %rcx
+ rorq $1, %rcx
+ shll $8, %ebx
+ addq %rbx, %rax
+ jmp 1b
-.Lbad_dest:
- movq 8(%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
-ENDPROC(csum_partial_copy_generic)
+.Lwas_odd:
+ roll $8, %eax
+ jmp .Lout
+
+ /* Exception: just return 0 */
+.Lfault:
+ xorl %eax, %eax
+ jmp .Lout
+SYM_FUNC_END(csum_partial_copy_generic)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index c66c8b0..1893449 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -10,7 +10,7 @@
#include <asm/smap.h>
/**
- * csum_partial_copy_from_user - Copy and checksum from user space.
+ * csum_and_copy_from_user - Copy and checksum from user space.
* @src: source address (user space)
* @dst: destination address
* @len: number of bytes to be copied.
@@ -21,57 +21,21 @@
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum isum, int *errp)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
{
+ __wsum sum;
+
might_sleep();
- *errp = 0;
-
- if (!likely(access_ok(src, len)))
- goto out_err;
-
- /*
- * Why 6, not 7? To handle odd addresses aligned we
- * would need to do considerable complications to fix the
- * checksum which is defined as an 16bit accumulator. The
- * fix alignment code is primarily for performance
- * compatibility with 32bit and that will handle odd
- * addresses slowly too.
- */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
-
- if (__get_user(val16, (const __u16 __user *)src))
- goto out_err;
-
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
- stac();
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, errp, NULL);
- clac();
- if (unlikely(*errp))
- goto out_err;
-
- return isum;
-
-out_err:
- *errp = -EFAULT;
- memset(dst, 0, len);
-
- return isum;
+ if (!user_access_begin(src, len))
+ return 0;
+ sum = csum_partial_copy_generic((__force const void *)src, dst, len);
+ user_access_end();
+ return sum;
}
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_and_copy_from_user);
/**
- * csum_partial_copy_to_user - Copy and checksum to user space.
+ * csum_and_copy_to_user - Copy and checksum to user space.
* @src: source address
* @dst: destination address (user space)
* @len: number of bytes to be copied.
@@ -82,41 +46,18 @@
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_partial_copy_to_user(const void *src, void __user *dst,
- int len, __wsum isum, int *errp)
+csum_and_copy_to_user(const void *src, void __user *dst, int len)
{
- __wsum ret;
+ __wsum sum;
might_sleep();
-
- if (unlikely(!access_ok(dst, len))) {
- *errp = -EFAULT;
+ if (!user_access_begin(dst, len))
return 0;
- }
-
- if (unlikely((unsigned long)dst & 6)) {
- while (((unsigned long)dst & 6) && len >= 2) {
- __u16 val16 = *(__u16 *)src;
-
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- *errp = __put_user(val16, (__u16 __user *)dst);
- if (*errp)
- return isum;
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
-
- *errp = 0;
- stac();
- ret = csum_partial_copy_generic(src, (void __force *)dst,
- len, isum, NULL, errp);
- clac();
- return ret;
+ sum = csum_partial_copy_generic(src, (void __force *)dst, len);
+ user_access_end();
+ return sum;
}
-EXPORT_SYMBOL(csum_partial_copy_to_user);
+EXPORT_SYMBOL(csum_and_copy_to_user);
/**
* csum_partial_copy_nocheck - Copy and checksum.
@@ -128,9 +69,9 @@
* Returns an 32bit unfolded checksum of the buffer.
*/
__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
- return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+ return csum_partial_copy_generic(src, dst, len);
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571..65d15df 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,20 @@
# include <asm/smp.h>
#endif
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
+
/* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
{
+ unsigned long loops = (unsigned long)__loops;
+
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
@@ -49,9 +60,9 @@
}
/* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
{
- u64 bclock, now, loops = __loops;
+ u64 bclock, now;
int cpu;
preempt_disable();
@@ -59,7 +70,7 @@
bclock = rdtsc_ordered();
for (;;) {
now = rdtsc_ordered();
- if ((now - bclock) >= loops)
+ if ((now - bclock) >= cycles)
break;
/* Allow RT tasks to run */
@@ -77,7 +88,7 @@
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
- loops -= (now - bclock);
+ cycles -= (now - bclock);
cpu = smp_processor_id();
bclock = rdtsc_ordered();
}
@@ -86,65 +97,96 @@
}
/*
- * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
*/
-static void delay_mwaitx(unsigned long __loops)
+static void delay_halt_tpause(u64 start, u64 cycles)
{
- u64 start, end, delay, loops = __loops;
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
+ * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
+ */
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
+{
+ u64 delay;
+
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+ /*
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+ * variable as the monitor target.
+ */
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+ * enter any deep C-state and we use it here in delay() to minimize
+ * wakeup latency.
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
+ */
+static void delay_halt(u64 __cycles)
+{
+ u64 start, end, cycles = __cycles;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
* is a store on the memory monitored by MONITORX.
*/
- if (loops == 0)
+ if (!cycles)
return;
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
-
- /*
- * Use cpu_tss_rw as a cacheline-aligned, seldomly
- * accessed per-cpu variable as the monitor target.
- */
- __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
-
- /*
- * AMD, like Intel's MWAIT version, supports the EAX hint and
- * EAX=0xf0 means, do not enter any deep C-state and we use it
- * here in delay() to minimize wakeup latency.
- */
- __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
-
+ delay_halt_fn(start, cycles);
end = rdtsc_ordered();
- if (loops <= end - start)
+ if (cycles <= end - start)
break;
- loops -= end - start;
-
+ cycles -= end - start;
start = end;
}
}
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void use_tsc_delay(void)
+void __init use_tsc_delay(void)
{
if (delay_fn == delay_loop)
delay_fn = delay_tsc;
}
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
+
void use_mwaitx_delay(void)
{
- delay_fn = delay_mwaitx;
+ delay_halt_fn = delay_halt_mwaitx;
+ delay_fn = delay_halt;
}
int read_current_timer(unsigned long *timer_val)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 9578eb8..fa1bc21 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,10 +35,21 @@
#include <asm/smap.h>
#include <asm/export.h>
+#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
+
+#ifdef CONFIG_X86_5LEVEL
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
+ __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
+#else
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
+#endif
+
.text
-ENTRY(__get_user_1)
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+SYM_FUNC_START(__get_user_1)
+ LOAD_TASK_SIZE_MINUS_N(0)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
@@ -47,91 +58,132 @@
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_1)
+SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
-ENTRY(__get_user_2)
- add $1,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+SYM_FUNC_START(__get_user_2)
+ LOAD_TASK_SIZE_MINUS_N(1)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-2: movzwl -1(%_ASM_AX),%edx
+2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_2)
+SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
-ENTRY(__get_user_4)
- add $3,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+SYM_FUNC_START(__get_user_4)
+ LOAD_TASK_SIZE_MINUS_N(3)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-3: movl -3(%_ASM_AX),%edx
+3: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_4)
+SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
-ENTRY(__get_user_8)
+SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64
- add $7,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-4: movq -7(%_ASM_AX),%rdx
+4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
ret
#else
- add $7,%_ASM_AX
- jc bad_get_user_8
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-4: movl -7(%_ASM_AX),%edx
-5: movl -3(%_ASM_AX),%ecx
+4: movl (%_ASM_AX),%edx
+5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax
ASM_CLAC
ret
#endif
-ENDPROC(__get_user_8)
+SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
+/* .. and the same for __get_user, just without the range checks */
+SYM_FUNC_START(__get_user_nocheck_1)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+6: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_1)
+EXPORT_SYMBOL(__get_user_nocheck_1)
-.Lbad_get_user_clac:
+SYM_FUNC_START(__get_user_nocheck_2)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+7: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_2)
+EXPORT_SYMBOL(__get_user_nocheck_2)
+
+SYM_FUNC_START(__get_user_nocheck_4)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+8: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_4)
+EXPORT_SYMBOL(__get_user_nocheck_4)
+
+SYM_FUNC_START(__get_user_nocheck_8)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+#ifdef CONFIG_X86_64
+9: movq (%_ASM_AX),%rdx
+#else
+9: movl (%_ASM_AX),%edx
+10: movl 4(%_ASM_AX),%ecx
+#endif
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_8)
+EXPORT_SYMBOL(__get_user_nocheck_8)
+
+
+SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
ret
+SYM_CODE_END(.Lbad_get_user_clac)
#ifdef CONFIG_X86_32
-.Lbad_get_user_8_clac:
+SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac)
ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
ret
+SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
+/* get_user */
_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
@@ -141,3 +193,14 @@
_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
#endif
+
+/* __get_user */
+ _ASM_EXTABLE_UA(6b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(7b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(8b, .Lbad_get_user_clac)
+#ifdef CONFIG_X86_64
+ _ASM_EXTABLE_UA(9b, .Lbad_get_user_clac)
+#else
+ _ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac)
+#endif
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index a14f993..dbf8cc9 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -8,7 +8,7 @@
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
-ENTRY(__sw_hweight32)
+SYM_FUNC_START(__sw_hweight32)
#ifdef CONFIG_X86_64
movl %edi, %eax # w
@@ -33,10 +33,10 @@
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
ret
-ENDPROC(__sw_hweight32)
+SYM_FUNC_END(__sw_hweight32)
EXPORT_SYMBOL(__sw_hweight32)
-ENTRY(__sw_hweight64)
+SYM_FUNC_START(__sw_hweight64)
#ifdef CONFIG_X86_64
pushq %rdi
pushq %rdx
@@ -79,5 +79,5 @@
popl %ecx
ret
#endif
-ENDPROC(__sw_hweight64)
+SYM_FUNC_END(__sw_hweight64)
EXPORT_SYMBOL(__sw_hweight64)
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index cbe7e25..c6a19c8 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -20,6 +20,7 @@
enum reg_type {
REG_TYPE_RM = 0,
+ REG_TYPE_REG,
REG_TYPE_INDEX,
REG_TYPE_BASE,
};
@@ -53,6 +54,29 @@
}
/**
+ * insn_has_rep_prefix() - Determine if instruction has a REP prefix
+ * @insn: Instruction containing the prefix to inspect
+ *
+ * Returns:
+ *
+ * true if the instruction has a REP prefix, false if not.
+ */
+bool insn_has_rep_prefix(struct insn *insn)
+{
+ insn_byte_t p;
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+/**
* get_seg_reg_override_idx() - obtain segment register override index
* @insn: Valid instruction with segment override prefixes
*
@@ -156,7 +180,7 @@
*/
static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
{
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
/*
* Resolve the default segment register as described in Section 3.7.4
@@ -180,7 +204,7 @@
if (insn->addr_bytes == 2)
return -EINVAL;
- /* fall through */
+ fallthrough;
case -EDOM:
case offsetof(struct pt_regs, bx):
@@ -267,7 +291,7 @@
* which may be invalid at this point.
*/
if (regoff == offsetof(struct pt_regs, ip)) {
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
else
return INAT_SEG_REG_CS;
@@ -290,7 +314,7 @@
* In long mode, segment override prefixes are ignored, except for
* overrides for FS and GS.
*/
- if (user_64bit_mode(regs)) {
+ if (any_64bit_mode(regs)) {
if (idx != INAT_SEG_REG_FS &&
idx != INAT_SEG_REG_GS)
idx = INAT_SEG_REG_IGNORE;
@@ -363,7 +387,6 @@
case INAT_SEG_REG_GS:
return vm86regs->gs;
case INAT_SEG_REG_IGNORE:
- /* fall through */
default:
return -EINVAL;
}
@@ -387,7 +410,6 @@
*/
return get_user_gs(regs);
case INAT_SEG_REG_IGNORE:
- /* fall through */
default:
return -EINVAL;
}
@@ -442,6 +464,13 @@
regno += 8;
break;
+ case REG_TYPE_REG:
+ regno = X86_MODRM_REG(insn->modrm.value);
+
+ if (X86_REX_R(insn->rex_prefix.value))
+ regno += 8;
+ break;
+
case REG_TYPE_INDEX:
regno = X86_SIB_INDEX(insn->sib.value);
if (X86_REX_X(insn->rex_prefix.value))
@@ -647,23 +676,27 @@
*/
return (unsigned long)(sel << 4);
- if (user_64bit_mode(regs)) {
+ if (any_64bit_mode(regs)) {
/*
* Only FS or GS will have a base address, the rest of
* the segments' bases are forced to 0.
*/
unsigned long base;
- if (seg_reg_idx == INAT_SEG_REG_FS)
+ if (seg_reg_idx == INAT_SEG_REG_FS) {
rdmsrl(MSR_FS_BASE, base);
- else if (seg_reg_idx == INAT_SEG_REG_GS)
+ } else if (seg_reg_idx == INAT_SEG_REG_GS) {
/*
* swapgs was called at the kernel entry point. Thus,
* MSR_KERNEL_GS_BASE will have the user-space GS base.
*/
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
+ if (user_mode(regs))
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ rdmsrl(MSR_GS_BASE, base);
+ } else {
base = 0;
+ }
return base;
}
@@ -704,7 +737,7 @@
if (sel < 0)
return 0;
- if (user_64bit_mode(regs) || v8086_mode(regs))
+ if (any_64bit_mode(regs) || v8086_mode(regs))
return -1L;
if (!sel)
@@ -783,7 +816,7 @@
*/
return INSN_CODE_SEG_PARAMS(4, 8);
case 3: /* Invalid setting. CS.L=1, CS.D=1 */
- /* fall through */
+ fallthrough;
default:
return -EINVAL;
}
@@ -807,6 +840,21 @@
}
/**
+ * insn_get_modrm_reg_off() - Obtain register in reg part of the ModRM byte
+ * @insn: Instruction containing the ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * Returns:
+ *
+ * The register indicated by the reg part of the ModRM byte. The
+ * register is obtained as an offset from the base of pt_regs.
+ */
+int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs)
+{
+ return get_reg_offset(insn, regs, REG_TYPE_REG);
+}
+
+/**
* get_seg_base_limit() - obtain base address and limit of a segment
* @insn: Instruction. Must be valid.
* @regs: Register values as seen when entering kernel mode
@@ -949,7 +997,7 @@
* following instruction.
*/
if (*regoff == -EDOM) {
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
tmp = regs->ip + insn->length;
else
tmp = 0;
@@ -1251,7 +1299,7 @@
* After computed, the effective address is treated as an unsigned
* quantity.
*/
- if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
+ if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
goto out;
/*
@@ -1366,3 +1414,124 @@
return (void __user *)-1L;
}
}
+
+unsigned long insn_get_effective_ip(struct pt_regs *regs)
+{
+ unsigned long seg_base = 0;
+
+ /*
+ * If not in user-space long mode, a custom code segment could be in
+ * use. This is true in protected mode (if the process defined a local
+ * descriptor table), or virtual-8086 mode. In most of the cases
+ * seg_base will be zero as in USER_CS.
+ */
+ if (!user_64bit_mode(regs)) {
+ seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+ if (seg_base == -1L)
+ return 0;
+ }
+
+ return seg_base + regs->ip;
+}
+
+/**
+ * insn_fetch_from_user() - Copy instruction bytes from user-space memory
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
+{
+ unsigned long ip;
+ int not_copied;
+
+ ip = insn_get_effective_ip(regs);
+ if (!ip)
+ return 0;
+
+ not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
+
+ return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
+ * while in atomic code
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf. This function must be used in atomic context.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
+{
+ unsigned long ip;
+ int not_copied;
+
+ ip = insn_get_effective_ip(regs);
+ if (!ip)
+ return 0;
+
+ not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
+
+ return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_decode() - Decode an instruction
+ * @insn: Structure to store decoded instruction
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Buffer containing the instruction bytes
+ * @buf_size: Number of instruction bytes available in buf
+ *
+ * Decodes the instruction provided in buf and stores the decoding results in
+ * insn. Also determines the correct address and operand sizes.
+ *
+ * Returns:
+ *
+ * True if instruction was decoded, False otherwise.
+ */
+bool insn_decode(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size)
+{
+ int seg_defs;
+
+ insn_init(insn, buf, buf_size, user_64bit_mode(regs));
+
+ /*
+ * Override the default operand and address sizes with what is specified
+ * in the code segment descriptor. The instruction decoder only sets
+ * the address size it to either 4 or 8 address bytes and does nothing
+ * for the operand bytes. This OK for most of the cases, but we could
+ * have special cases where, for instance, a 16-bit code segment
+ * descriptor is used.
+ * If there is an address override prefix, the instruction decoder
+ * correctly updates these values, even for 16-bit defaults.
+ */
+ seg_defs = insn_get_code_seg_params(regs);
+ if (seg_defs == -EINVAL)
+ return false;
+
+ insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+ insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+
+ insn_get_length(insn);
+ if (buf_size < insn->length)
+ return false;
+
+ return true;
+}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 0b5862b..4042795 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,6 +13,8 @@
#include <asm/inat.h>
#include <asm/insn.h>
+#include <asm/emulate_prefix.h>
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@@ -58,6 +60,36 @@
insn->addr_bytes = 4;
}
+static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
+static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
+
+static int __insn_get_emulate_prefix(struct insn *insn,
+ const insn_byte_t *prefix, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
+ goto err_out;
+ }
+
+ insn->emulate_prefix_size = len;
+ insn->next_byte += len;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+static void insn_get_emulate_prefix(struct insn *insn)
+{
+ if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
+ return;
+
+ __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
+}
+
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
@@ -76,6 +108,8 @@
if (prefixes->got)
return;
+ insn_get_emulate_prefix(insn);
+
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index a9bdf08..cb5a196 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -8,8 +8,8 @@
/*
* override generic version in lib/iomap_copy.c
*/
-ENTRY(__iowrite32_copy)
+SYM_FUNC_START(__iowrite32_copy)
movl %edx,%ecx
rep movsd
ret
-ENDPROC(__iowrite32_copy)
+SYM_FUNC_END(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index dc2fb88..1e299ac 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,10 +4,11 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
+.pushsection .noinstr.text, "ax"
+
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
@@ -26,7 +27,7 @@
* Output:
* rax original destination
*/
-ENTRY(__memcpy)
+SYM_FUNC_START_ALIAS(__memcpy)
SYM_FUNC_START_WEAK(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -39,8 +40,8 @@
movl %edx, %ecx
rep movsb
ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
+SYM_FUNC_END(memcpy)
+SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
@@ -48,14 +49,14 @@
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
-ENTRY(memcpy_erms)
+SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
-ENDPROC(memcpy_erms)
+SYM_FUNC_END(memcpy_erms)
-ENTRY(memcpy_orig)
+SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
@@ -180,118 +181,6 @@
.Lend:
retq
-ENDPROC(memcpy_orig)
+SYM_FUNC_END(memcpy_orig)
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-ENTRY(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-ENDPROC(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
+.popsection
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index b292445..41902fe 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -25,12 +25,9 @@
* rax: dest
*/
SYM_FUNC_START_WEAK(memmove)
-ENTRY(__memmove)
+SYM_FUNC_START(__memmove)
- /* Handle more 32 bytes in loop */
mov %rdi, %rax
- cmp $0x20, %rdx
- jb 1f
/* Decide forward/backward copy mode */
cmp %rdi, %rsi
@@ -40,7 +37,9 @@
cmp %rdi, %r8
jg 2f
+ /* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
+ ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
/*
@@ -112,6 +111,8 @@
*/
.p2align 4
2:
+ cmp $0x20, %rdx
+ jb 1f
cmp $680, %rdx
jb 6f
cmp %dil, %sil
@@ -205,7 +206,7 @@
movb %r11b, (%rdi)
13:
retq
-ENDPROC(__memmove)
-ENDPROC(memmove)
+SYM_FUNC_END(__memmove)
+SYM_FUNC_END_ALIAS(memmove)
EXPORT_SYMBOL(__memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index e3376c7..0bfd26e 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -18,7 +18,7 @@
* rax original destination
*/
SYM_FUNC_START_WEAK(memset)
-ENTRY(__memset)
+SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
@@ -41,8 +41,8 @@
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset)
-ENDPROC(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
@@ -57,16 +57,16 @@
*
* rax original destination
*/
-ENTRY(memset_erms)
+SYM_FUNC_START_LOCAL(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset_erms)
+SYM_FUNC_END(memset_erms)
-ENTRY(memset_orig)
+SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10
/* expand byte value */
@@ -137,4 +137,4 @@
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
-ENDPROC(memset_orig)
+SYM_FUNC_END(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index ed33cba..a2b9caa 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -12,7 +12,7 @@
*
*/
.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
pushq %rbx
pushq %r12
movq %rdi, %r10 /* Save pointer */
@@ -41,13 +41,13 @@
jmp 2b
_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
.endm
#else /* X86_32 */
.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
pushl %ebx
pushl %ebp
pushl %esi
@@ -83,7 +83,7 @@
jmp 2b
_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
.endm
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 126dd6a..0ea344c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -25,77 +25,89 @@
* Inputs: %eax[:%edx] contains the data
* %ecx contains the address
*
- * Outputs: %eax is error code (0 or -EFAULT)
+ * Outputs: %ecx is error code (0 or -EFAULT)
+ *
+ * Clobbers: %ebx needed for task pointer
*
* These functions should not modify any other registers,
* as they get called from within inline assembly.
*/
-#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
+#ifdef CONFIG_X86_5LEVEL
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
+ __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
+#else
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
+#endif
.text
-ENTRY(__put_user_1)
- ENTER
- cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
+SYM_FUNC_START(__put_user_1)
+ LOAD_TASK_SIZE_MINUS_N(0)
+ cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
ASM_STAC
1: movb %al,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
-ENDPROC(__put_user_1)
+SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
+EXPORT_SYMBOL(__put_user_nocheck_1)
-ENTRY(__put_user_2)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $1,%_ASM_BX
+SYM_FUNC_START(__put_user_2)
+ LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
ASM_STAC
2: movw %ax,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
-ENDPROC(__put_user_2)
+SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
+EXPORT_SYMBOL(__put_user_nocheck_2)
-ENTRY(__put_user_4)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $3,%_ASM_BX
+SYM_FUNC_START(__put_user_4)
+ LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
ASM_STAC
3: movl %eax,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
-ENDPROC(__put_user_4)
+SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
+EXPORT_SYMBOL(__put_user_nocheck_4)
-ENTRY(__put_user_8)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $7,%_ASM_BX
+SYM_FUNC_START(__put_user_8)
+ LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
5: movl %edx,4(%_ASM_CX)
#endif
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
RET
-ENDPROC(__put_user_8)
+SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+EXPORT_SYMBOL(__put_user_nocheck_8)
-.Lbad_put_user_clac:
+SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
ASM_CLAC
.Lbad_put_user:
- movl $-EFAULT,%eax
+ movl $-EFAULT,%ecx
RET
+SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961..b4c43a9 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
-ENTRY(__x86_indirect_thunk_\reg)
- CFI_STARTPROC
- JMP_NOSPEC %\reg
- CFI_ENDPROC
-ENDPROC(__x86_indirect_thunk_\reg)
+ .align 32
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
+ JMP_NOSPEC \reg
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ UNWIND_HINT_EMPTY
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_RET_OFFSET
+ ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
.endm
/*
@@ -24,25 +40,24 @@
* only see one instance of "__x86_indirect_thunk_\reg" rather
* than one per register with the correct names. So we do it
* the simple and nasty way...
+ *
+ * Worse, you can only have a single EXPORT_SYMBOL per line,
+ * and CPP can't insert newlines, so we have to repeat everything
+ * at least twice.
*/
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)
+
+#undef GEN
+#define GEN(reg) THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1847e99..508c81e 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -56,27 +56,6 @@
}
EXPORT_SYMBOL(clear_user);
-/*
- * Similar to copy_user_handle_tail, probe for the write fault point,
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
- * clac() is handled in _copy_to_iter_mcsafe().
- */
-__visible notrace unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5cb9f00..ec31f5b 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -366,7 +366,7 @@
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c: Grp20 (1A),(1C)
1d:
-1e:
+1e: Grp21 (1A)
1f: NOP Ev
# 0x0f 0x20-0x2f
20: MOV Rd,Cd
@@ -695,16 +695,28 @@
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
-# Skip 0x50-0x57
+50: vpdpbusd Vx,Hx,Wx (66),(ev)
+51: vpdpbusds Vx,Hx,Wx (66),(ev)
+52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66),(ev) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev)
+53: vpdpwssds Vx,Hx,Wx (66),(ev) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev)
+54: vpopcntb/w Vx,Wx (66),(ev)
+55: vpopcntd/q Vx,Wx (66),(ev)
58: vpbroadcastd Vx,Wx (66),(v)
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x63
+# Skip 0x5c-0x61
+62: vpexpandb/w Vx,Wx (66),(ev)
+63: vpcompressb/w Wx,Vx (66),(ev)
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
65: vblendmps/d Vx,Hx,Wx (66),(ev)
66: vpblendmb/w Vx,Hx,Wx (66),(ev)
-# Skip 0x67-0x74
+68: vp2intersectd/q Kx,Hx,Wx (F2),(ev)
+# Skip 0x69-0x6f
+70: vpshldvw Vx,Hx,Wx (66),(ev)
+71: vpshldvd/q Vx,Hx,Wx (66),(ev)
+72: vcvtne2ps2bf16 Vx,Hx,Wx (F2),(ev) | vcvtneps2bf16 Vx,Wx (F3),(ev) | vpshrdvw Vx,Hx,Wx (66),(ev)
+73: vpshrdvd/q Vx,Hx,Wx (66),(ev)
75: vpermi2b/w Vx,Hx,Wx (66),(ev)
76: vpermi2d/q Vx,Hx,Wx (66),(ev)
77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
@@ -727,6 +739,7 @@
8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
8d: vpermb/w Vx,Hx,Wx (66),(ev)
8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+8f: vpshufbitqmb Kx,Hx,Wx (66),(ev)
# 0x0f 0x38 0x90-0xbf (FMA)
90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
@@ -738,8 +751,8 @@
97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
-9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
-9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) | v4fmaddps Vdqq,Hdqq,Wdq (F2),(ev)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | v4fmaddss Vdq,Hdq,Wdq (F2),(ev)
9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
@@ -752,8 +765,8 @@
a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
-aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
-ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) | v4fnmaddps Vdqq,Hdqq,Wdq (F2),(ev)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | v4fnmaddss Vdq,Hdq,Wdq (F2),(ev)
ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
@@ -780,17 +793,18 @@
cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+cf: vgf2p8mulb Vx,Wx (66)
db: VAESIMC Vdq,Wdq (66),(v1)
-dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
-dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
-de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
-df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+dc: vaesenc Vx,Hx,Wx (66)
+dd: vaesenclast Vx,Hx,Wx (66)
+de: vaesdec Vx,Hx,Wx (66)
+df: vaesdeclast Vx,Hx,Wx (66)
f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
f2: ANDN Gy,By,Ey (v)
f3: Grp17 (1A)
-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
@@ -848,7 +862,7 @@
41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
-44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+44: vpclmulqdq Vx,Hx,Wx,Ib (66)
46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
@@ -865,7 +879,13 @@
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
66: vfpclassps/d Vk,Wx,Ib (66),(ev)
67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
+71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
+72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
+73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
+ce: vgf2p8affineqb Vx,Wx,Ib (66)
+cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
f0: RORX Gy,Ey,Ib (F2),(v)
EndTable
@@ -950,7 +970,7 @@
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -1021,8 +1041,8 @@
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
-5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
+5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@@ -1057,6 +1077,11 @@
0: cldemote Mb
EndTable
+GrpTable: Grp21
+1: RDSSPD/Q Ry (F3),(11B)
+7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B)
+EndTable
+
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH