Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73..1aaef5b 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*_32
*_64
single_step_syscall
@@ -11,5 +12,4 @@
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d49bfe..6703c79 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,8 @@
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- protection_keys test_vdso test_vsyscall mov_ss_trap \
- syscall_arg_fault
+ test_vdso test_vsyscall mov_ss_trap \
+ syscall_arg_fault fsgsbase_restore
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
@@ -70,10 +70,10 @@
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
-$(BINARIES_32): $(OUTPUT)/%_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
-$(BINARIES_64): $(OUTPUT)/%_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 757bdb2..7161cfc 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -285,7 +285,8 @@
/* 32-bit set_thread_area */
long ret;
asm volatile ("int $0x80"
- : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
: "r8", "r9", "r10", "r11");
memcpy(&desc, low_desc, sizeof(desc));
munmap(low_desc, sizeof(desc));
@@ -551,11 +552,28 @@
* selector value is changed or not by the GSBASE write in
* a ptracer.
*/
- if (gs == 0 && base == 0xFF) {
- printf("[OK]\tGS was reset as expected\n");
- } else {
+ if (gs != *shared_scratch) {
nerrs++;
- printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
+ printf("[FAIL]\tGS changed to %lx\n", gs);
+
+ /*
+ * On older kernels, poking a nonzero value into the
+ * base would zero the selector. On newer kernels,
+ * this behavior has changed -- poking the base
+ * changes only the base and, if FSGSBASE is not
+ * available, this may have no effect once the tracee
+ * is resumed.
+ */
+ if (gs == 0)
+ printf("\tNote: this is expected behavior on older kernels.\n");
+ } else if (have_fsgsbase && (base != 0xFF)) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ } else {
+ printf("[OK]\tGS remained 0x%hx", *shared_scratch);
+ if (have_fsgsbase)
+ printf(" and GSBASE changed to 0xFF");
+ printf("\n");
}
}
diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c
new file mode 100644
index 0000000..6fffadc
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase_restore.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fsgsbase_restore.c, test ptrace vs fsgsbase
+ * Copyright (c) 2020 Andy Lutomirski
+ *
+ * This test case simulates a tracer redirecting tracee execution to
+ * a function and then restoring tracee state using PTRACE_GETREGS and
+ * PTRACE_SETREGS. This is similar to what gdb does when doing
+ * 'p func()'. The catch is that this test has the called function
+ * modify a segment register. This makes sure that ptrace correctly
+ * restores segment state when using PTRACE_SETREGS.
+ *
+ * This is not part of fsgsbase.c, because that test is 64-bit only.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <stdint.h>
+
+#define EXPECTED_VALUE 0x1337f00d
+
+#ifdef __x86_64__
+# define SEG "%gs"
+#else
+# define SEG "%fs"
+#endif
+
+static unsigned int dereference_seg_base(void)
+{
+ int ret;
+ asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
+ return ret;
+}
+
+static void init_seg(void)
+{
+ unsigned int *target = mmap(
+ NULL, sizeof(unsigned int),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ if (target == MAP_FAILED)
+ err(1, "mmap");
+
+ *target = EXPECTED_VALUE;
+
+ printf("\tsegment base address = 0x%lx\n", (unsigned long)target);
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = (unsigned int)(uintptr_t)target,
+ .limit = sizeof(unsigned int) - 1,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tusing LDT slot 0\n");
+ asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
+#ifdef __x86_64__
+ : "r8", "r9", "r10", "r11"
+#endif
+ );
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- can't test anything\n");
+ exit(0);
+ }
+ printf("\tusing GDT slot %d\n", desc.entry_number);
+
+ unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3);
+ asm volatile ("mov %0, %" SEG :: "rm" (sel));
+ }
+}
+
+static void tracee_zap_segment(void)
+{
+ /*
+ * The tracer will redirect execution here. This is meant to
+ * work like gdb's 'p func()' feature. The tricky bit is that
+ * we modify a segment register in order to make sure that ptrace
+ * can correctly restore segment registers.
+ */
+ printf("\tTracee: in tracee_zap_segment()\n");
+
+ /*
+ * Write a nonzero selector with base zero to the segment register.
+ * Using a null selector would defeat the test on AMD pre-Zen2
+ * CPUs, as such CPUs don't clear the base when loading a null
+ * selector.
+ */
+ unsigned short sel;
+ asm volatile ("mov %%ss, %0\n\t"
+ "mov %0, %" SEG
+ : "=rm" (sel));
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee is going back to sleep\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ /* Should not get here. */
+ while (true) {
+ printf("[FAIL]\tTracee hit unreachable code\n");
+ pause();
+ }
+}
+
+int main()
+{
+ printf("\tSetting up a segment\n");
+ init_seg();
+
+ unsigned int val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ return 1;
+ }
+ printf("[OK]\tThe segment points to the right place.\n");
+
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0);
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee will take a nap until signaled\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ printf("\tTracee was resumed. Will re-check segment.\n");
+
+ val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ exit(1);
+ }
+
+ printf("[OK]\tThe segment points to the right place.\n");
+ exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ if (ptrace(PTRACE_GETREGS, chld, NULL, ®s) != 0)
+ err(1, "PTRACE_GETREGS");
+
+#ifdef __x86_64__
+ printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base);
+#else
+ printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs);
+#endif
+
+ struct user_regs_struct regs2 = regs;
+#ifdef __x86_64__
+ regs2.rip = (unsigned long)tracee_zap_segment;
+ regs2.rsp -= 128; /* Don't clobber the redzone. */
+#else
+ regs2.eip = (unsigned long)tracee_zap_segment;
+#endif
+
+ printf("\tTracer: redirecting tracee to tracee_zap_segment()\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, ®s2) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("\tTracer: restoring tracee state\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, ®s) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ if (WIFSIGNALED(status)) {
+ printf("[FAIL]\tTracee crashed\n");
+ return 1;
+ }
+
+ if (!WIFEXITED(status)) {
+ printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status);
+ return 1;
+ }
+
+ int exitcode = WEXITSTATUS(status);
+ if (exitcode != 0) {
+ printf("[FAIL]\tTracee reported failure\n");
+ return 1;
+ }
+
+ printf("[OK]\tAll is well.\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
new file mode 100644
index 0000000..f5ff2a2
--- /dev/null
+++ b/tools/testing/selftests/x86/helpers.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_HELPERS_H
+#define __SELFTESTS_X86_HELPERS_H
+
+#include <asm/processor-flags.h>
+
+static inline unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+
+ asm volatile (
+#ifdef __x86_64__
+ "subq $128, %%rsp\n\t"
+ "pushfq\n\t"
+ "popq %0\n\t"
+ "addq $128, %%rsp"
+#else
+ "pushfl\n\t"
+ "popl %0"
+#endif
+ : "=r" (eflags) :: "memory");
+
+ return eflags;
+}
+
+static inline void set_eflags(unsigned long eflags)
+{
+ asm volatile (
+#ifdef __x86_64__
+ "subq $128, %%rsp\n\t"
+ "pushq %0\n\t"
+ "popfq\n\t"
+ "addq $128, %%rsp"
+#else
+ "pushl %0\n\t"
+ "popfl"
+#endif
+ :: "r" (eflags) : "flags", "memory");
+}
+
+#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
index 01de41c..57ec5e9 100644
--- a/tools/testing/selftests/x86/ioperm.c
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -131,6 +131,17 @@
printf("[RUN]\tchild: check that we inherited permissions\n");
expect_ok(0x80);
expect_gp(0xed);
+ printf("[RUN]\tchild: Extend permissions to 0x81\n");
+ if (ioperm(0x81, 1, 1) != 0) {
+ printf("[FAIL]\tioperm(0x81, 1, 1) failed (%d)", errno);
+ return 1;
+ }
+ printf("[RUN]\tchild: Drop permissions to 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ expect_gp(0x80);
return 0;
} else {
int status;
@@ -146,8 +157,11 @@
}
}
- /* Test the capability checks. */
+ /* Verify that the child dropping 0x80 did not affect the parent */
+ printf("\tVerify that unsharing the bitmap worked\n");
+ expect_ok(0x80);
+ /* Test the capability checks. */
printf("\tDrop privileges\n");
if (setresuid(1, 1, 1) != 0) {
printf("[WARN]\tDropping privileges failed\n");
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index 6aa27f3..7e3e09c 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -35,6 +35,16 @@
}
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
static jmp_buf jmpbuf;
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
@@ -42,25 +52,167 @@
siglongjmp(jmpbuf, 1);
}
+static bool try_outb(unsigned short port)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("outb %%al, %w[port]"
+ : : [port] "Nd" (port), "a" (0));
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_ok_outb(unsigned short port)
+{
+ if (!try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp_outb(unsigned short port)
+{
+ if (try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+ nerrs++;
+ }
+
+ printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+#define RET_FAULTED 0
+#define RET_FAIL 1
+#define RET_EMUL 2
+
+static int try_cli(void)
+{
+ unsigned long flags;
+
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return RET_FAULTED;
+ } else {
+ asm volatile("cli; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (!(flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static int try_sti(bool irqs_off)
+{
+ unsigned long flags;
+
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return RET_FAULTED;
+ } else {
+ asm volatile("sti; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (irqs_off && (flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_gp_sti(bool irqs_off)
+{
+ int ret = try_sti(irqs_off);
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tSTI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tSTI NOPped\n");
+ break;
+ default:
+ printf("[FAIL]\tSTI worked\n");
+ nerrs++;
+ }
+}
+
+/*
+ * Returns whether it managed to disable interrupts.
+ */
+static bool test_cli(void)
+{
+ int ret = try_cli();
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tCLI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tCLI NOPped\n");
+ break;
+ default:
+ printf("[FAIL]\tCLI worked\n");
+ nerrs++;
+ return true;
+ }
+
+ return false;
+}
+
int main(void)
{
cpu_set_t cpuset;
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
err(1, "sched_setaffinity to CPU 0");
/* Probe for iopl support. Note that iopl(0) works even as nonroot. */
- if (iopl(3) != 0) {
+ switch(iopl(3)) {
+ case 0:
+ break;
+ case -ENOSYS:
+ printf("[OK]\tiopl() nor supported\n");
+ return 0;
+ default:
printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
errno);
return 0;
}
- /* Restore our original state prior to starting the test. */
+ /* Make sure that CLI/STI are blocked even with IOPL level 3 */
+ expect_gp_sti(test_cli());
+ expect_ok_outb(0x80);
+
+ /* Establish an I/O bitmap to test the restore */
+ if (ioperm(0x80, 1, 1) != 0)
+ err(1, "ioperm(0x80, 1, 1) failed\n");
+
+ /* Restore our original state prior to starting the fork test. */
if (iopl(0) != 0)
err(1, "iopl(0)");
+ /*
+ * Verify that IOPL emulation is disabled and the I/O bitmap still
+ * works.
+ */
+ expect_ok_outb(0x80);
+ expect_gp_outb(0xed);
+ /* Drop the I/O bitmap */
+ if (ioperm(0x80, 1, 0) != 0)
+ err(1, "ioperm(0x80, 1, 0) failed\n");
+
pid_t child = fork();
if (child == -1)
err(1, "fork");
@@ -90,14 +242,8 @@
printf("[RUN]\tparent: write to 0x80 (should fail)\n");
- sethandler(SIGSEGV, sigsegv, 0);
- if (sigsetjmp(jmpbuf, 1) != 0) {
- printf("[OK]\twrite was denied\n");
- } else {
- asm volatile ("outb %%al, $0x80" : : "a" (0));
- printf("[FAIL]\twrite was allowed\n");
- nerrs++;
- }
+ expect_gp_outb(0x80);
+ expect_gp_sti(test_cli());
/* Test the capability checks. */
printf("\tiopl(3)\n");
@@ -133,4 +279,3 @@
done:
return nerrs ? 1 : 0;
}
-
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e543..0000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
deleted file mode 100644
index a360214..0000000
--- a/tools/testing/selftests/x86/protection_keys.c
+++ /dev/null
@@ -1,1508 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
- *
- * There are examples in here of:
- * * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
- * information from the siginfo
- *
- * Things to add:
- * make sure KSM and KSM COW breaking works
- * prefault pages in at malloc, or not
- * protect MPX bounds tables with protection keys?
- * make sure VMA splitting/merging is working correctly
- * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- *
- * Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- */
-#define _GNU_SOURCE
-#include <errno.h>
-#include <linux/futex.h>
-#include <time.h>
-#include <sys/time.h>
-#include <sys/syscall.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <setjmp.h>
-
-#include "pkey-helpers.h"
-
-int iteration_nr = 1;
-int test_nr;
-
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
-int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
-void cat_into_file(char *str, char *file)
-{
- int fd = open(file, O_RDWR);
- int ret;
-
- dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
- /*
- * these need to be raw because they are called under
- * pkey_assert()
- */
- if (fd < 0) {
- fprintf(stderr, "error opening '%s'\n", str);
- perror("error: ");
- exit(__LINE__);
- }
-
- ret = write(fd, str, strlen(str));
- if (ret != strlen(str)) {
- perror("write to file failed");
- fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- exit(__LINE__);
- }
- close(fd);
-}
-
-#if CONTROL_TRACING > 0
-static int warned_tracing;
-int tracing_root_ok(void)
-{
- if (geteuid() != 0) {
- if (!warned_tracing)
- fprintf(stderr, "WARNING: not run as root, "
- "can not do tracing control\n");
- warned_tracing = 1;
- return 0;
- }
- return 1;
-}
-#endif
-
-void tracing_on(void)
-{
-#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
- char pidstr[32];
-
- if (!tracing_root_ok())
- return;
-
- sprintf(pidstr, "%d", getpid());
- cat_into_file("0", TRACEDIR "/tracing_on");
- cat_into_file("\n", TRACEDIR "/trace");
- if (1) {
- cat_into_file("function_graph", TRACEDIR "/current_tracer");
- cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
- } else {
- cat_into_file("nop", TRACEDIR "/current_tracer");
- }
- cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
- cat_into_file("1", TRACEDIR "/tracing_on");
- dprintf1("enabled tracing\n");
-#endif
-}
-
-void tracing_off(void)
-{
-#if CONTROL_TRACING > 0
- if (!tracing_root_ok())
- return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
-#endif
-}
-
-void abort_hooks(void)
-{
- fprintf(stderr, "running %s()...\n", __func__);
- tracing_off();
-#ifdef SLEEP_ON_ABORT
- sleep(SLEEP_ON_ABORT);
-#endif
-}
-
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
-/*
- * This attempts to have roughly a page of instructions followed by a few
- * instructions that do a write, and another page of instructions. That
- * way, we are pretty sure that the write is in the second page of
- * instructions and has at least a page of padding behind it.
- *
- * *That* lets us be sure to madvise() away the write instruction, which
- * will then fault, which makes sure that the fault code handles
- * execute-only memory properly.
- */
-__attribute__((__aligned__(PAGE_SIZE)))
-void lots_o_noops_around_write(int *write_to_me)
-{
- dprintf3("running %s()\n", __func__);
- __page_o_noops();
- /* Assume this happens in the second page of instructions: */
- *write_to_me = __LINE__;
- /* pad out by another page: */
- __page_o_noops();
- dprintf3("%s() done\n", __func__);
-}
-
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
-void dump_mem(void *dumpme, int len_bytes)
-{
- char *c = (void *)dumpme;
- int i;
-
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
-}
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
-static char *si_code_str(int si_code)
-{
- if (si_code == SEGV_MAPERR)
- return "SEGV_MAPERR";
- if (si_code == SEGV_ACCERR)
- return "SEGV_ACCERR";
- if (si_code == SEGV_BNDERR)
- return "SEGV_BNDERR";
- if (si_code == SEGV_PKUERR)
- return "SEGV_PKUERR";
- return "UNKNOWN";
-}
-
-int pkru_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
-{
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
- char *fpregs;
- u32 *pkru_ptr;
- u64 siginfo_pkey;
- u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
-
- dprint_in_signal = 1;
- dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
-
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
-#ifdef __i386__
- /*
- * 32-bit has some extra padding so that userspace can tell whether
- * the XSTATE header is present in addition to the "legacy" FPU
- * state. We just assume that it is here.
- */
- fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
-
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
- /*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
- * here.
- */
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
- if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
-
- if ((si->si_code == SEGV_MAPERR) ||
- (si->si_code == SEGV_ACCERR) ||
- (si->si_code == SEGV_BNDERR)) {
- printf("non-PK si_code, exiting...\n");
- exit(4);
- }
-
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
- dprintf1("<<<<==================================================\n");
- dprint_in_signal = 0;
-}
-
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
-{
- dprint_in_signal = 1;
- dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
- dprint_in_signal = 0;
-}
-
-void setup_sigsegv_handler(void)
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #PF is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0;
- newact.sa_sigaction = signal_handler;
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- pkey_assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- r = sigaction(SIGALRM, &newact, &oldact);
- pkey_assert(r == 0);
-}
-
-void setup_handlers(void)
-{
- signal(SIGCHLD, &sig_chld);
- setup_sigsegv_handler();
-}
-
-pid_t fork_lazy_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- while (1) {
- dprintf1("child sleeping...\n");
- sleep(30);
- }
- }
- return forkret;
-}
-
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
-{
- int ret;
- unsigned long init_val = 0x0;
-
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
- ret = sys_pkey_alloc(0, init_val);
- /*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
- */
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- if (ret) {
- /* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- /*
- * move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
- */
- shadow_pkru |= (init_val << (ret * 2));
- }
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
- /* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
-/*
- * I had a bug where pkey bits could be set by mprotect() but
- * not cleared. This ensures we get lots of random bit sets
- * and clears on the vma and pte pkey bits.
- */
-int alloc_random_pkey(void)
-{
- int max_nr_pkey_allocs;
- int ret;
- int i;
- int alloced_pkeys[NR_PKEYS];
- int nr_alloced = 0;
- int random_index;
- memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
-
- /* allocate every possible key and make a note of which ones we got */
- max_nr_pkey_allocs = NR_PKEYS;
- for (i = 0; i < max_nr_pkey_allocs; i++) {
- int new_pkey = alloc_pkey();
- if (new_pkey < 0)
- break;
- alloced_pkeys[nr_alloced++] = new_pkey;
- }
-
- pkey_assert(nr_alloced > 0);
- /* select a random one out of the allocated ones */
- random_index = rand() % nr_alloced;
- ret = alloced_pkeys[random_index];
- /* now zero it out so we don't free it next */
- alloced_pkeys[random_index] = 0;
-
- /* go through the allocated ones that we did not want and free them */
- for (i = 0; i < nr_alloced; i++) {
- int free_ret;
- if (!alloced_pkeys[i])
- continue;
- free_ret = sys_pkey_free(alloced_pkeys[i]);
- pkey_assert(!free_ret);
- }
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int nr_iterations = random() % 100;
- int ret;
-
- while (0) {
- int rpkey = alloc_random_pkey();
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- if (nr_iterations-- < 0)
- break;
-
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- }
- pkey_assert(pkey < NR_PKEYS);
-
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-struct pkey_malloc_record {
- void *ptr;
- long size;
- int prot;
-};
-struct pkey_malloc_record *pkey_malloc_records;
-struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size, int prot)
-{
- long i;
- struct pkey_malloc_record *rec = NULL;
-
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- rec = &pkey_malloc_records[i];
- /* find a free record */
- if (rec)
- break;
- }
- if (!rec) {
- /* every record is full */
- size_t old_nr_records = nr_pkey_malloc_records;
- size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
- size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
- dprintf2("new_nr_records: %zd\n", new_nr_records);
- dprintf2("new_size: %zd\n", new_size);
- pkey_malloc_records = realloc(pkey_malloc_records, new_size);
- pkey_assert(pkey_malloc_records != NULL);
- rec = &pkey_malloc_records[nr_pkey_malloc_records];
- /*
- * realloc() does not initialize memory, so zero it from
- * the first new record all the way to the end.
- */
- for (i = 0; i < new_nr_records - old_nr_records; i++)
- memset(rec + i, 0, sizeof(*rec));
- }
- dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
- (int)(rec - pkey_malloc_records), rec, ptr, size);
- rec->ptr = ptr;
- rec->size = size;
- rec->prot = prot;
- pkey_last_malloc_record = rec;
- nr_pkey_malloc_records++;
-}
-
-void free_pkey_malloc(void *ptr)
-{
- long i;
- int ret;
- dprintf3("%s(%p)\n", __func__, ptr);
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- struct pkey_malloc_record *rec = &pkey_malloc_records[i];
- dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- if ((ptr < rec->ptr) ||
- (ptr >= rec->ptr + rec->size))
- continue;
-
- dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- nr_pkey_malloc_records--;
- ret = munmap(rec->ptr, rec->size);
- dprintf3("munmap ret: %d\n", ret);
- pkey_assert(!ret);
- dprintf3("clearing rec->ptr, rec: %p\n", rec);
- rec->ptr = NULL;
- dprintf3("done clearing rec->ptr, rec: %p\n", rec);
- return;
- }
- pkey_assert(false);
-}
-
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
-{
- void *ptr;
- int ret;
-
- rdpkru();
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
- pkey_assert(!ret);
- record_pkey_malloc(ptr, size, prot);
- rdpkru();
-
- dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
-{
- int ret;
- void *ptr;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- /*
- * Guarantee we can fit at least one huge page in the resulting
- * allocation by allocating space for 2:
- */
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size, prot);
- mprotect_pkey(ptr, size, prot, pkey);
-
- dprintf1("unaligned ptr: %p\n", ptr);
- ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
- dprintf1(" aligned ptr: %p\n", ptr);
- ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
- dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
- ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
- dprintf1("MADV_WILLNEED ret: %d\n", ret);
- memset(ptr, 0, HPAGE_SIZE);
-
- dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-int hugetlb_setup_ok;
-#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
-{
- int err;
- int fd;
- char buf[] = "123";
-
- if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
- return;
- }
-
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
-
- /*
- * Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
- */
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
- return;
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- close(fd);
- if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
- return;
- }
-
- if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
- return;
- }
-
- hugetlb_setup_ok = 1;
-}
-
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
-{
- void *ptr;
- int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
-
- if (!hugetlb_setup_ok)
- return PTR_ERR_ENOTSUP;
-
- dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
- pkey_assert(ptr != (void *)-1);
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
-
- malloc_pkey_with_mprotect,
- malloc_pkey_anon_huge,
- malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
-};
-
-void *malloc_pkey(long size, int prot, u16 pkey)
-{
- void *ret;
- static int malloc_type;
- int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
-
- pkey_assert(pkey < NR_PKEYS);
-
- while (1) {
- pkey_assert(malloc_type < nr_malloc_types);
-
- ret = pkey_malloc[malloc_type](size, prot, pkey);
- pkey_assert(ret != (void *)-1);
-
- malloc_type++;
- if (malloc_type >= nr_malloc_types)
- malloc_type = (random()%nr_malloc_types);
-
- /* try again if the malloc_type we tried is unsupported */
- if (ret == PTR_ERR_ENOTSUP)
- continue;
-
- break;
- }
-
- dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
- size, prot, pkey, ret);
- return ret;
-}
-
-int last_pkru_faults;
-#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
-{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
- dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
-
- /*
- * For exec-only memory, we do not know the pkey in
- * advance, so skip this check.
- */
- if (pkey != UNKNOWN_PKEY)
- pkey_assert(last_si_pkey == pkey);
-
- /*
- * The signal handler shold have cleared out PKRU to let the
- * test program continue. We now have to restore it.
- */
- if (__rdpkru() != 0)
- pkey_assert(0);
-
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
- last_si_pkey = -1;
-}
-
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
-} while (0)
-
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
-{
- pkey_assert(fd >= 0);
- pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
- test_fds[nr_test_fds] = fd;
- nr_test_fds++;
-}
-
-int get_test_read_fd(void)
-{
- int test_fd = open("/etc/passwd", O_RDONLY);
- __save_test_fd(test_fd);
- return test_fd;
-}
-
-void close_test_fds(void)
-{
- int i;
-
- for (i = 0; i < nr_test_fds; i++) {
- if (test_fds[i] < 0)
- continue;
- close(test_fds[i]);
- test_fds[i] = -1;
- }
- nr_test_fds = 0;
-}
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling write access to PKEY[1], doing read\n");
- pkey_write_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- dprintf1("\n");
-}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
- pkey_access_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
-}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
- pkey_write_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
- pkey_access_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel read() to buffer\n", pkey);
- pkey_access_deny(pkey);
- ret = read(test_fd, ptr, 1);
- dprintf1("read ret: %d\n", ret);
- pkey_assert(ret);
-}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- pkey_write_deny(pkey);
- ret = read(test_fd, ptr, 100);
- dprintf1("read ret: %d\n", ret);
- if (ret < 0 && (DEBUG_LEVEL > 0))
- perror("verbose read result (OK for this to be bad)");
- pkey_assert(ret);
-}
-
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int pipe_ret, vmsplice_ret;
- struct iovec iov;
- int pipe_fds[2];
-
- pipe_ret = pipe(pipe_fds);
-
- pkey_assert(pipe_ret == 0);
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel vmsplice from buffer\n", pkey);
- pkey_access_deny(pkey);
- iov.iov_base = ptr;
- iov.iov_len = PAGE_SIZE;
- vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
- dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
- pkey_assert(vmsplice_ret == -1);
-
- close(pipe_fds[0]);
- close(pipe_fds[1]);
-}
-
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
-{
- int ignored = 0xdada;
- int futex_ret;
- int some_int = __LINE__;
-
- dprintf1("disabling write to PKEY[%02d], "
- "doing futex gunk in buffer\n", pkey);
- *ptr = some_int;
- pkey_write_deny(pkey);
- futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
- &ignored, ignored);
- if (DEBUG_LEVEL > 0)
- perror("futex");
- dprintf1("futex() ret: %d\n", futex_ret);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
-{
- int err;
- int i;
-
- /* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
- if (pkey == i)
- continue;
-
- dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
- pkey_assert(err);
- }
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
-{
- int err;
- int bad_pkey = NR_PKEYS+99;
-
- /* pass a known-invalid pkey in: */
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
- pkey_assert(err);
-}
-
-void become_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- return;
- }
- exit(0);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
-{
- int err;
- int allocated_pkeys[NR_PKEYS] = {0};
- int nr_allocated_pkeys = 0;
- int i;
-
- for (i = 0; i < NR_PKEYS*3; i++) {
- int new_pkey;
- dprintf1("%s() alloc loop: %d\n", __func__, i);
- new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
- dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
- if ((new_pkey == -1) && (errno == ENOSPC)) {
- dprintf2("%s() failed to allocate pkey after %d tries\n",
- __func__, nr_allocated_pkeys);
- } else {
- /*
- * Ensure the number of successes never
- * exceeds the number of keys supported
- * in the hardware.
- */
- pkey_assert(nr_allocated_pkeys < NR_PKEYS);
- allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
- }
-
- /*
- * Make sure that allocation state is properly
- * preserved across fork().
- */
- if (i == NR_PKEYS*2)
- become_child();
- }
-
- dprintf3("%s()::%d\n", __func__, __LINE__);
-
- /*
- * There are 16 pkeys supported in hardware. Three are
- * allocated by the time we get here:
- * 1. The default key (0)
- * 2. One possibly consumed by an execute-only mapping.
- * 3. One allocated by the test code and passed in via
- * 'pkey' to this function.
- * Ensure that we can allocate at least another 13 (16-3).
- */
- pkey_assert(i >= NR_PKEYS-3);
-
- for (i = 0; i < nr_allocated_pkeys; i++) {
- err = sys_pkey_free(allocated_pkeys[i]);
- pkey_assert(!err);
- rdpkru(); /* for shadow checking */
- }
-}
-
-/*
- * pkey 0 is special. It is allocated by default, so you do not
- * have to call pkey_alloc() to use it first. Make sure that it
- * is usable.
- */
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
-{
- long size;
- int prot;
-
- assert(pkey_last_malloc_record);
- size = pkey_last_malloc_record->size;
- /*
- * This is a bit of a hack. But mprotect() requires
- * huge-page-aligned sizes when operating on hugetlbfs.
- * So, make sure that we use something that's a multiple
- * of a huge page when we can.
- */
- if (size >= HPAGE_SIZE)
- size = HPAGE_SIZE;
- prot = pkey_last_malloc_record->prot;
-
- /* Use pkey 0 */
- mprotect_pkey(ptr, size, prot, 0);
-
- /* Make sure that we can set it back to the original pkey. */
- mprotect_pkey(ptr, size, prot, pkey);
-}
-
-void test_ptrace_of_child(int *ptr, u16 pkey)
-{
- __attribute__((__unused__)) int peek_result;
- pid_t child_pid;
- void *ignored = 0;
- long ret;
- int status;
- /*
- * This is the "control" for our little expermient. Make sure
- * we can always access it when ptracing.
- */
- int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
- int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
-
- /*
- * Fork a child which is an exact copy of this process, of course.
- * That means we can do all of our tests via ptrace() and then plain
- * memory access and ensure they work differently.
- */
- child_pid = fork_lazy_child();
- dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
-
- ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
- if (ret)
- perror("attach");
- dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
- pkey_assert(ret != -1);
- ret = waitpid(child_pid, &status, WUNTRACED);
- if ((ret != child_pid) || !(WIFSTOPPED(status))) {
- fprintf(stderr, "weird waitpid result %ld stat %x\n",
- ret, status);
- pkey_assert(0);
- }
- dprintf2("waitpid ret: %ld\n", ret);
- dprintf2("waitpid status: %d\n", status);
-
- pkey_access_deny(pkey);
- pkey_write_deny(pkey);
-
- /* Write access, untested for now:
- ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
- pkey_assert(ret != -1);
- dprintf1("poke at %p: %ld\n", peek_at, ret);
- */
-
- /*
- * Try to access the pkey-protected "ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect an exception: */
- peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
-
- /*
- * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect NO exception: */
- peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
-
- ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
- pkey_assert(ret != -1);
-
- ret = kill(child_pid, SIGKILL);
- pkey_assert(ret != -1);
-
- wait(&status);
-
- free(plain_ptr_unaligned);
-}
-
-void *get_pointer_to_instructions(void)
-{
- void *p1;
-
- p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
- dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
- /* lots_o_noops_around_write should be page-aligned already */
- assert(p1 == &lots_o_noops_around_write);
-
- /* Point 'p1' at the *second* page of the function: */
- p1 += PAGE_SIZE;
-
- /*
- * Try to ensure we fault this in on next touch to ensure
- * we get an instruction fault as opposed to a data one
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
-
- return p1;
-}
-
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
- pkey_assert(!ret);
- pkey_access_deny(pkey);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /*
- * Make sure this is an *instruction* fault
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
-}
-
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- dprintf1("%s() start\n", __func__);
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- /* Use a *normal* mprotect(), not mprotect_pkey(): */
- ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
- pkey_assert(!ret);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /* Make sure this is an *instruction* fault */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
-
- /*
- * Put the memory back to non-PROT_EXEC. Should clear the
- * exec-only pkey off the VMA and allow it to be readable
- * again. Go to PROT_NONE first to check for a kernel bug
- * that did not clear the pkey when doing PROT_NONE.
- */
- ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
- pkey_assert(!ret);
-
- ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
- pkey_assert(!ret);
- ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
-}
-
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
-{
- int size = PAGE_SIZE;
- int sret;
-
- if (cpu_has_pku()) {
- dprintf1("SKIP: %s: no CPU support\n", __func__);
- return;
- }
-
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
- pkey_assert(sret < 0);
-}
-
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
- test_read_of_write_disabled_region,
- test_read_of_access_disabled_region,
- test_write_of_write_disabled_region,
- test_write_of_access_disabled_region,
- test_kernel_write_of_access_disabled_region,
- test_kernel_write_of_write_disabled_region,
- test_kernel_gup_of_access_disabled_region,
- test_kernel_gup_write_to_write_disabled_region,
- test_executing_on_unreadable_memory,
- test_implicit_mprotect_exec_only_memory,
- test_mprotect_with_pkey_0,
- test_ptrace_of_child,
- test_pkey_syscalls_on_non_allocated_pkey,
- test_pkey_syscalls_bad_args,
- test_pkey_alloc_exhaust,
-};
-
-void run_tests_once(void)
-{
- int *ptr;
- int prot = PROT_READ|PROT_WRITE;
-
- for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
- int pkey;
- int orig_pkru_faults = pkru_faults;
-
- dprintf1("======================\n");
- dprintf1("test %d preparing...\n", test_nr);
-
- tracing_on();
- pkey = alloc_random_pkey();
- dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
- ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
- dprintf1("test %d starting...\n", test_nr);
- pkey_tests[test_nr](ptr, pkey);
- dprintf1("freeing test memory: %p\n", ptr);
- free_pkey_malloc(ptr);
- sys_pkey_free(pkey);
-
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
-
- tracing_off();
- close_test_fds();
-
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
- dprintf1("======================\n\n");
- }
- iteration_nr++;
-}
-
-void pkey_setup_shadow(void)
-{
- shadow_pkru = __rdpkru();
-}
-
-int main(void)
-{
- int nr_iterations = 22;
-
- srand((unsigned int)time(NULL));
-
- setup_handlers();
-
- printf("has pku: %d\n", cpu_has_pku());
-
- if (!cpu_has_pku()) {
- int size = PAGE_SIZE;
- int *ptr;
-
- printf("running PKEY tests for unsupported CPU/OS\n");
-
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- assert(ptr != (void *)-1);
- test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
- exit(0);
- }
-
- pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
- setup_hugetlbfs();
-
- while (nr_iterations-- > 0)
- run_tests_once();
-
- printf("done (all tests OK)\n");
- return 0;
-}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50ce6c3..120ac74 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -31,6 +31,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -43,7 +45,19 @@
err(1, "sigaction");
}
-static volatile sig_atomic_t sig_traps;
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps, sig_eflags;
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -55,21 +69,6 @@
# define INT80_CLOBBERS
#endif
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
@@ -90,6 +89,25 @@
}
}
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
+
+ sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
+ siglongjmp(jmpbuf, 1);
+}
+
static void check_result(void)
{
unsigned long new_eflags = get_eflags();
@@ -109,6 +127,22 @@
sig_traps = 0;
}
+static void fast_syscall_no_tf(void)
+{
+ sig_traps = 0;
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+}
+
int main()
{
#ifdef CAN_BUILD_32
@@ -163,17 +197,46 @@
check_result();
/* Now make sure that another fast syscall doesn't set TF again. */
- printf("[RUN]\tFast syscall with TF cleared\n");
- fflush(stdout); /* Force a syscall */
- if (get_eflags() & X86_EFLAGS_TF) {
- printf("[FAIL]\tTF is now set\n");
+ fast_syscall_no_tf();
+
+ /*
+ * And do a forced SYSENTER to make sure that this works even if
+ * fast syscalls don't use SYSENTER.
+ *
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ unsigned long nr = SYS_getpid;
+ printf("[RUN]\tSet TF and check SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, print_and_longjmp,
+ SA_RESETHAND | SA_ONSTACK);
+ sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ /* Clear EBP first to make sure we segfault cleanly. */
+ asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
+ }
+ clearhandler(SIGSEGV);
+ clearhandler(SIGILL);
+ if (!(sig_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
exit(1);
}
- if (sig_traps) {
- printf("[FAIL]\tGot SIGTRAP\n");
- exit(1);
- }
- printf("[OK]\tNothing unexpected happened\n");
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ fast_syscall_no_tf();
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bc0ecc2..bff474b 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,30 +15,11 @@
#include <setjmp.h>
#include <errno.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
/* Our sigaltstack scratch space. */
static unsigned char altstack_data[SIGSTKSZ];
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -72,6 +53,7 @@
if (ax != -EFAULT && ax != -ENOSYS) {
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
(unsigned long)ax);
+ printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
@@ -226,5 +208,30 @@
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#ifdef __x86_64__
+ printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+
+ asm volatile ("wrgsbase %%rax\n\t"
+ :: "a" (0xffffffffffff0000UL));
+
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#endif
+
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index a765f62..a108b80 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -13,29 +13,11 @@
#include <signal.h>
#include <err.h>
#include <sys/syscall.h>
-#include <asm/processor-flags.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
static unsigned int nerrs;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -74,6 +56,12 @@
printf("[RUN]\tSet NT and issue a syscall\n");
do_it(X86_EFLAGS_NT);
+ printf("[RUN]\tSet AC and issue a syscall\n");
+ do_it(X86_EFLAGS_AC);
+
+ printf("[RUN]\tSet NT|AC and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC);
+
/*
* Now try it again with TF set -- TF forces returns via IRET in all
* cases except non-ptregs-using 64-bit full fast path syscalls.
@@ -81,8 +69,28 @@
sethandler(SIGTRAP, sigtrap, 0);
+ printf("[RUN]\tSet TF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF);
+
printf("[RUN]\tSet NT|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+ printf("[RUN]\tSet AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ printf("[RUN]\tSet NT|AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ /*
+ * Now try DF. This is evil and it's plausible that we will crash
+ * glibc, but glibc would have to do something rather surprising
+ * for this to happen.
+ */
+ printf("[RUN]\tSet DF and issue a syscall\n");
+ do_it(X86_EFLAGS_DF);
+
+ printf("[RUN]\tSet TF|DF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF | X86_EFLAGS_DF);
+
return nerrs == 0 ? 0 : 1;
}
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 35edd61..42052db 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -259,6 +259,11 @@
static void test_clock_gettime(void)
{
+ if (!vdso_clock_gettime) {
+ printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
+ return;
+ }
+
for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
clock++) {
test_one_clock_gettime(clock, clocknames[clock]);
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index a4f4d4c..5b45e69 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -20,6 +20,8 @@
#include <setjmp.h>
#include <sys/uio.h>
+#include "helpers.h"
+
#ifdef __x86_64__
# define VSYS(x) (x)
#else
@@ -460,6 +462,17 @@
return 0;
}
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this. We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty. write(2) can't
+ * read from the vsyscall page on any kernel version or mode. The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
static int test_process_vm_readv(void)
{
#ifdef __x86_64__
@@ -475,17 +488,24 @@
remote.iov_len = 4096;
ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
if (ret != 4096) {
- printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
- return 0;
+ /*
+ * We expect process_vm_readv() to work if and only if the
+ * vsyscall page is readable.
+ */
+ printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+ return vsyscall_map_r ? 1 : 0;
}
if (vsyscall_map_r) {
- if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+ if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
printf("[OK]\tIt worked and read correct data\n");
} else {
printf("[FAIL]\tIt worked but returned incorrect data\n");
return 1;
}
+ } else {
+ printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+ return 1;
}
#endif
@@ -493,21 +513,8 @@
}
#ifdef __x86_64__
-#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t num_vsyscall_traps;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
-}
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 0075ccd..4c311e1 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -11,6 +11,8 @@
#include <features.h>
#include <stdio.h>
+#include "helpers.h"
+
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
int main()
@@ -53,27 +55,6 @@
err(1, "sigaction");
}
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index 29a5c94..fe99f24 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -15,6 +15,7 @@
#include <err.h>
#include <stdio.h>
+#include <dlfcn.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
@@ -46,11 +47,23 @@
int nerrs = 0;
struct real_sigaction sa;
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[SKIP]\tFailed to find vDSO. Tests are not expected to work.\n");
+ return 0;
+ }
+
memset(&sa, 0, sizeof(sa));
sa.handler = handler_with_siginfo;
sa.flags = SA_SIGINFO;
sa.restorer = NULL; /* request kernel-provided restorer */
+ printf("[RUN]\tRaise a signal, SA_SIGINFO, sa.restorer == NULL\n");
+
if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
err(1, "raw rt_sigaction syscall");
@@ -63,6 +76,8 @@
nerrs++;
}
+ printf("[RUN]\tRaise a signal, !SA_SIGINFO, sa.restorer == NULL\n");
+
sa.flags = 0;
sa.handler = handler_without_siginfo;
if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)