Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 1865201..5d49bfe 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,12 +11,13 @@
 CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
-			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
-			protection_keys test_vdso test_vsyscall mov_ss_trap
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+			check_initial_reg_state sigreturn iopl ioperm \
+			protection_keys test_vdso test_vsyscall mov_ss_trap \
+			syscall_arg_fault
+TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 172d329..3e2089c 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
 # check_cc.sh - Helper to test userspace compilation support
 # Copyright (c) 2015 Andrew Lutomirski
-# GPL v2
 
 CC="$1"
 TESTPROG="$2"
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
index 6aaed9b..3bc95f3 100644
--- a/tools/testing/selftests/x86/check_initial_reg_state.c
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * check_initial_reg_state.c - check that execve sets the correct state
  * Copyright (c) 2014-2016 Andrew Lutomirski
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #define _GNU_SOURCE
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index ade443a..d1e919b 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * entry_from_vm86.c - tests kernel entries from vm86 mode
  * Copyright (c) 2014-2015 Andrew Lutomirski
  *
  * This exercises a few paths that need to special-case vm86 mode.
- *
- * GPL v2.
  */
 
 #define _GNU_SOURCE
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index f249e04..15a329d 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * fsgsbase.c, an fsgsbase test
  * Copyright (c) 2014-2016 Andy Lutomirski
- * GPL v2
  */
 
 #define _GNU_SOURCE
@@ -23,6 +23,10 @@
 #include <pthread.h>
 #include <asm/ldt.h>
 #include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <setjmp.h>
 
 #ifndef __x86_64__
 # error This test is 64-bit only
@@ -31,6 +35,8 @@
 static volatile sig_atomic_t want_segv;
 static volatile unsigned long segv_addr;
 
+static unsigned short *shared_scratch;
+
 static int nerrs;
 
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
@@ -71,6 +77,43 @@
 
 }
 
+static jmp_buf jmpbuf;
+
+static void sigill(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool have_fsgsbase;
+
+static inline unsigned long rdgsbase(void)
+{
+	unsigned long gsbase;
+
+	asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
+
+	return gsbase;
+}
+
+static inline unsigned long rdfsbase(void)
+{
+	unsigned long fsbase;
+
+	asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
+
+	return fsbase;
+}
+
+static inline void wrgsbase(unsigned long gsbase)
+{
+	asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
+}
+
+static inline void wrfsbase(unsigned long fsbase)
+{
+	asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
+}
+
 enum which_base { FS, GS };
 
 static unsigned long read_base(enum which_base which)
@@ -199,16 +242,13 @@
 	       to_set, hard_zero ? " and clear gs" : "", sel);
 }
 
-void do_unexpected_base(void)
+static __thread int set_thread_area_entry_number = -1;
+
+static unsigned short load_gs(void)
 {
 	/*
-	 * The goal here is to try to arrange for GS == 0, GSBASE !=
-	 * 0, and for the the kernel the think that GSBASE == 0.
-	 *
-	 * To make the test as reliable as possible, this uses
-	 * explicit descriptorss.  (This is not the only way.  This
-	 * could use ARCH_SET_GS with a low, nonzero base, but the
-	 * relevant side effect of ARCH_SET_GS could change.)
+	 * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
+	 * that GSBASE == 0 (i.e. thread.gsbase == 0).
 	 */
 
 	/* Step 1: tell the kernel that we have GSBASE == 0. */
@@ -228,8 +268,9 @@
 		.useable         = 0
 	};
 	if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
-		printf("\tother thread: using LDT slot 0\n");
+		printf("\tusing LDT slot 0\n");
 		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+		return 0x7;
 	} else {
 		/* No modify_ldt for us (configured out, perhaps) */
 
@@ -239,7 +280,7 @@
 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
 		memcpy(low_desc, &desc, sizeof(desc));
 
-		low_desc->entry_number = -1;
+		low_desc->entry_number = set_thread_area_entry_number;
 
 		/* 32-bit set_thread_area */
 		long ret;
@@ -251,18 +292,43 @@
 
 		if (ret != 0) {
 			printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
-			return;
+			return 0;
 		}
-		printf("\tother thread: using GDT slot %d\n", desc.entry_number);
-		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+		printf("\tusing GDT slot %d\n", desc.entry_number);
+		set_thread_area_entry_number = desc.entry_number;
+
+		unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
+		asm volatile ("mov %0, %%gs" : : "rm" (gs));
+		return gs;
 	}
+}
 
-	/*
-	 * Step 3: set the selector back to zero.  On AMD chips, this will
-	 * preserve GSBASE.
-	 */
+void test_wrbase(unsigned short index, unsigned long base)
+{
+	unsigned short newindex;
+	unsigned long newbase;
 
-	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+	printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
+
+	asm volatile ("mov %0, %%gs" : : "rm" (index));
+	wrgsbase(base);
+
+	remote_base = 0;
+	ftx = 1;
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	while (ftx != 0)
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+	asm volatile ("mov %%gs, %0" : "=rm" (newindex));
+	newbase = rdgsbase();
+
+	if (newindex == index && newbase == base) {
+		printf("[OK]\tIndex and base were preserved\n");
+	} else {
+		printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
+		       newindex, newbase);
+		nerrs++;
+	}
 }
 
 static void *threadproc(void *ctx)
@@ -273,12 +339,19 @@
 		if (ftx == 3)
 			return NULL;
 
-		if (ftx == 1)
+		if (ftx == 1) {
 			do_remote_base();
-		else if (ftx == 2)
-			do_unexpected_base();
-		else
+		} else if (ftx == 2) {
+			/*
+			 * On AMD chips, this causes GSBASE != 0, GS == 0, and
+			 * thread.gsbase == 0.
+			 */
+
+			load_gs();
+			asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+		} else {
 			errx(1, "helper thread got bad command");
+		}
 
 		ftx = 0;
 		syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
@@ -367,10 +440,85 @@
 	}
 }
 
+#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+
+static void test_ptrace_write_gsbase(void)
+{
+	int status;
+	pid_t child = fork();
+
+	if (child < 0)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
+
+		*shared_scratch = load_gs();
+
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	wait(&status);
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		unsigned long gs, base;
+		unsigned long gs_offset = USER_REGS_OFFSET(gs);
+		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+		gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
+
+		if (gs != *shared_scratch) {
+			nerrs++;
+			printf("[FAIL]\tGS is not prepared with nonzero\n");
+			goto END;
+		}
+
+		if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
+			err(1, "PTRACE_POKEUSER");
+
+		gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+		/*
+		 * In a non-FSGSBASE system, the nonzero selector will load
+		 * GSBASE (again). But what is tested here is whether the
+		 * selector value is changed or not by the GSBASE write in
+		 * a ptracer.
+		 */
+		if (gs == 0 && base == 0xFF) {
+			printf("[OK]\tGS was reset as expected\n");
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
+		}
+	}
+
+END:
+	ptrace(PTRACE_CONT, child, NULL, NULL);
+}
+
 int main()
 {
 	pthread_t thread;
 
+	shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+			      MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+
+	/* Probe FSGSBASE */
+	sethandler(SIGILL, sigill, 0);
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		rdfsbase();
+		have_fsgsbase = true;
+		printf("\tFSGSBASE instructions are enabled\n");
+	} else {
+		printf("\tFSGSBASE instructions are disabled\n");
+	}
+	clearhandler(SIGILL);
+
 	sethandler(SIGSEGV, sigsegv, 0);
 
 	check_gs_value(0);
@@ -417,11 +565,28 @@
 
 	test_unexpected_base();
 
+	if (have_fsgsbase) {
+		unsigned short ss;
+
+		asm volatile ("mov %%ss, %0" : "=rm" (ss));
+
+		test_wrbase(0, 0);
+		test_wrbase(0, 1);
+		test_wrbase(0, 0x200000000);
+		test_wrbase(0, 0xffffffffffffffff);
+		test_wrbase(ss, 0);
+		test_wrbase(ss, 1);
+		test_wrbase(ss, 0x200000000);
+		test_wrbase(ss, 0xffffffffffffffff);
+	}
+
 	ftx = 3;  /* Kill the thread. */
 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 
 	if (pthread_join(thread, NULL) != 0)
 		err(1, "pthread_join");
 
+	test_ptrace_write_gsbase();
+
 	return nerrs == 0 ? 0 : 1;
 }
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 3c3a022..6da0ac3 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -257,7 +257,8 @@
 			err(1, "sigaltstack");
 		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
 		nr = SYS_getpid;
-		asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+		/* Clear EBP first to make sure we segfault cleanly. */
+		asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
 			      : [ss] "m" (ss) : "flags", "rcx"
 #ifdef __x86_64__
 				, "r11"
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
deleted file mode 100644
index 7546eba..0000000
--- a/tools/testing/selftests/x86/mpx-debug.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_DEBUG_H
-#define _MPX_DEBUG_H
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-#define dprintf5(args...) dprintf_level(5, args)
-
-#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
deleted file mode 100644
index c13607e..0000000
--- a/tools/testing/selftests/x86/mpx-dig.c
+++ /dev/null
@@ -1,499 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Written by Dave Hansen <dave.hansen@intel.com>
- */
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <fcntl.h>
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-#include "mpx-hw.h"
-
-unsigned long bounds_dir_global;
-
-#define mpx_dig_abort()	__mpx_dig_abort(__FILE__, __func__, __LINE__)
-static void inline __mpx_dig_abort(const char *file, const char *func, int line)
-{
-	fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
-	printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
-	abort();
-}
-
-/*
- * run like this (BDIR finds the probably bounds directory):
- *
- *	BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
- *		| head -1 | awk -F- '{print $1}')";
- *	./mpx-dig $pid 0x$BDIR
- *
- * NOTE:
- *	assumes that the only 2097152-kb VMA is the bounds dir
- */
-
-long nr_incore(void *ptr, unsigned long size_bytes)
-{
-	int i;
-	long ret = 0;
-	long vec_len = size_bytes / PAGE_SIZE;
-	unsigned char *vec = malloc(vec_len);
-	int incore_ret;
-
-	if (!vec)
-		mpx_dig_abort();
-
-	incore_ret = mincore(ptr, size_bytes, vec);
-	if (incore_ret) {
-		printf("mincore ret: %d\n", incore_ret);
-		perror("mincore");
-		mpx_dig_abort();
-	}
-	for (i = 0; i < vec_len; i++)
-		ret += vec[i];
-	free(vec);
-	return ret;
-}
-
-int open_proc(int pid, char *file)
-{
-	static char buf[100];
-	int fd;
-
-	snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
-	fd = open(&buf[0], O_RDONLY);
-	if (fd < 0)
-		perror(buf);
-
-	return fd;
-}
-
-struct vaddr_range {
-	unsigned long start;
-	unsigned long end;
-};
-struct vaddr_range *ranges;
-int nr_ranges_allocated;
-int nr_ranges_populated;
-int last_range = -1;
-
-int __pid_load_vaddrs(int pid)
-{
-	int ret = 0;
-	int proc_maps_fd = open_proc(pid, "maps");
-	char linebuf[10000];
-	unsigned long start;
-	unsigned long end;
-	char rest[1000];
-	FILE *f = fdopen(proc_maps_fd, "r");
-
-	if (!f)
-		mpx_dig_abort();
-	nr_ranges_populated = 0;
-	while (!feof(f)) {
-		char *readret = fgets(linebuf, sizeof(linebuf), f);
-		int parsed;
-
-		if (readret == NULL) {
-			if (feof(f))
-				break;
-			mpx_dig_abort();
-		}
-
-		parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
-		if (parsed != 3)
-			mpx_dig_abort();
-
-		dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
-		if (nr_ranges_populated >= nr_ranges_allocated) {
-			ret = -E2BIG;
-			break;
-		}
-		ranges[nr_ranges_populated].start = start;
-		ranges[nr_ranges_populated].end = end;
-		nr_ranges_populated++;
-	}
-	last_range = -1;
-	fclose(f);
-	close(proc_maps_fd);
-	return ret;
-}
-
-int pid_load_vaddrs(int pid)
-{
-	int ret;
-
-	dprintf2("%s(%d)\n", __func__, pid);
-	if (!ranges) {
-		nr_ranges_allocated = 4;
-		ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
-		dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
-			 nr_ranges_allocated, ranges);
-		assert(ranges != NULL);
-	}
-	do {
-		ret = __pid_load_vaddrs(pid);
-		if (!ret)
-			break;
-		if (ret == -E2BIG) {
-			dprintf2("%s(%d) need to realloc\n", __func__, pid);
-			nr_ranges_allocated *= 2;
-			ranges = realloc(ranges,
-					nr_ranges_allocated * sizeof(ranges[0]));
-			dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
-					pid, nr_ranges_allocated, ranges);
-			assert(ranges != NULL);
-			dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
-		}
-	} while (1);
-
-	dprintf2("%s(%d) done\n", __func__, pid);
-
-	return ret;
-}
-
-static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
-{
-	if (vaddr < r->start)
-		return 0;
-	if (vaddr >= r->end)
-		return 0;
-	return 1;
-}
-
-static inline int vaddr_mapped_by_range(unsigned long vaddr)
-{
-	int i;
-
-	if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
-		return 1;
-
-	for (i = 0; i < nr_ranges_populated; i++) {
-		struct vaddr_range *r = &ranges[i];
-
-		if (vaddr_in_range(vaddr, r))
-			continue;
-		last_range = i;
-		return 1;
-	}
-	return 0;
-}
-
-const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
-
-void *read_bounds_table_into_buf(unsigned long table_vaddr)
-{
-#ifdef MPX_DIG_STANDALONE
-	static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
-	off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
-	if (seek_ret != table_vaddr)
-		mpx_dig_abort();
-
-	int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
-	if (read_ret != sizeof(bt_buf))
-		mpx_dig_abort();
-	return &bt_buf;
-#else
-	return (void *)table_vaddr;
-#endif
-}
-
-int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
-		unsigned long bde_vaddr)
-{
-	unsigned long offset_inside_bt;
-	int nr_entries = 0;
-	int do_abort = 0;
-	char *bt_buf;
-
-	dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
-			__func__, base_controlled_vaddr, bde_vaddr);
-
-	bt_buf = read_bounds_table_into_buf(table_vaddr);
-
-	dprintf4("%s() read done\n", __func__);
-
-	for (offset_inside_bt = 0;
-	     offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
-	     offset_inside_bt += bt_entry_size_bytes) {
-		unsigned long bt_entry_index;
-		unsigned long bt_entry_controls;
-		unsigned long this_bt_entry_for_vaddr;
-		unsigned long *bt_entry_buf;
-		int i;
-
-		dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
-			offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
-		bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
-		if (!bt_buf) {
-			printf("null bt_buf\n");
-			mpx_dig_abort();
-		}
-		if (!bt_entry_buf) {
-			printf("null bt_entry_buf\n");
-			mpx_dig_abort();
-		}
-		dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
-				bt_entry_buf);
-		if (!bt_entry_buf[0] &&
-		    !bt_entry_buf[1] &&
-		    !bt_entry_buf[2] &&
-		    !bt_entry_buf[3])
-			continue;
-
-		nr_entries++;
-
-		bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
-		bt_entry_controls = sizeof(void *);
-		this_bt_entry_for_vaddr =
-			base_controlled_vaddr + bt_entry_index*bt_entry_controls;
-		/*
-		 * We sign extend vaddr bits 48->63 which effectively
-		 * creates a hole in the virtual address space.
-		 * This calculation corrects for the hole.
-		 */
-		if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
-			this_bt_entry_for_vaddr |= 0xffff800000000000;
-
-		if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
-			printf("bt_entry_buf: %p\n", bt_entry_buf);
-			printf("there is a bte for %lx but no mapping\n",
-					this_bt_entry_for_vaddr);
-			printf("	  bde   vaddr: %016lx\n", bde_vaddr);
-			printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
-			printf("	  table_vaddr: %016lx\n", table_vaddr);
-			printf("	  entry vaddr: %016lx @ offset %lx\n",
-				table_vaddr + offset_inside_bt, offset_inside_bt);
-			do_abort = 1;
-			mpx_dig_abort();
-		}
-		if (DEBUG_LEVEL < 4)
-			continue;
-
-		printf("table entry[%lx]: ", offset_inside_bt);
-		for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
-			printf("0x%016lx ", bt_entry_buf[i]);
-		printf("\n");
-	}
-	if (do_abort)
-		mpx_dig_abort();
-	dprintf4("%s() done\n",  __func__);
-	return nr_entries;
-}
-
-int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
-		int *nr_populated_bdes)
-{
-	unsigned long i;
-	int total_entries = 0;
-
-	dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
-			len_bytes, bd_offset_bytes, buf + len_bytes);
-
-	for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
-		unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
-		unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
-		unsigned long bounds_dir_entry;
-		unsigned long bd_for_vaddr;
-		unsigned long bt_start;
-		unsigned long bt_tail;
-		int nr_entries;
-
-		dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
-				bounds_dir_entry_ptr);
-
-		bounds_dir_entry = *bounds_dir_entry_ptr;
-		if (!bounds_dir_entry) {
-			dprintf4("no bounds dir at index 0x%lx / 0x%lx "
-				 "start at offset:%lx %lx\n", bd_index, bd_index,
-					bd_offset_bytes, i);
-			continue;
-		}
-		dprintf3("found bounds_dir_entry: 0x%lx @ "
-			 "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
-					&buf[i]);
-		/* mask off the enable bit: */
-		bounds_dir_entry &= ~0x1;
-		(*nr_populated_bdes)++;
-		dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
-		dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
-
-		bt_start = bounds_dir_entry;
-		bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
-		if (!vaddr_mapped_by_range(bt_start)) {
-			printf("bounds directory 0x%lx points to nowhere\n",
-					bounds_dir_entry);
-			mpx_dig_abort();
-		}
-		if (!vaddr_mapped_by_range(bt_tail)) {
-			printf("bounds directory end 0x%lx points to nowhere\n",
-					bt_tail);
-			mpx_dig_abort();
-		}
-		/*
-		 * Each bounds directory entry controls 1MB of virtual address
-		 * space.  This variable is the virtual address in the process
-		 * of the beginning of the area controlled by this bounds_dir.
-		 */
-		bd_for_vaddr = bd_index * (1UL<<20);
-
-		nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
-				bounds_dir_global+bd_offset_bytes+i);
-		total_entries += nr_entries;
-		dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
-			 "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
-				bd_index, buf+i,
-				bounds_dir_entry, nr_entries, total_entries,
-				bd_for_vaddr, bd_for_vaddr + (1UL<<20));
-	}
-	dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
-			bd_offset_bytes);
-	return total_entries;
-}
-
-int proc_pid_mem_fd = -1;
-
-void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
-			   long buffer_size_bytes, void *buffer)
-{
-	unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
-	int read_ret;
-	off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
-
-	if (seek_ret != seekto)
-		mpx_dig_abort();
-
-	read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
-	/* there shouldn't practically be short reads of /proc/$pid/mem */
-	if (read_ret != buffer_size_bytes)
-		mpx_dig_abort();
-
-	return buffer;
-}
-void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
-			   long buffer_size_bytes, void *buffer)
-
-{
-	unsigned char vec[buffer_size_bytes / PAGE_SIZE];
-	char *dig_bounds_dir_ptr =
-		(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
-	/*
-	 * use mincore() to quickly find the areas of the bounds directory
-	 * that have memory and thus will be worth scanning.
-	 */
-	int incore_ret;
-
-	int incore = 0;
-	int i;
-
-	dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
-
-	incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
-	if (incore_ret) {
-		printf("mincore ret: %d\n", incore_ret);
-		perror("mincore");
-		mpx_dig_abort();
-	}
-	for (i = 0; i < sizeof(vec); i++)
-		incore += vec[i];
-	dprintf4("%s() total incore: %d\n", __func__, incore);
-	if (!incore)
-		return NULL;
-	dprintf3("%s() total incore: %d\n", __func__, incore);
-	return dig_bounds_dir_ptr;
-}
-
-int inspect_pid(int pid)
-{
-	static int dig_nr;
-	long offset_inside_bounds_dir;
-	char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
-	char *dig_bounds_dir_ptr;
-	int total_entries = 0;
-	int nr_populated_bdes = 0;
-	int inspect_self;
-
-	if (getpid() == pid) {
-		dprintf4("inspecting self\n");
-		inspect_self = 1;
-	} else {
-		dprintf4("inspecting pid %d\n", pid);
-		mpx_dig_abort();
-	}
-
-	for (offset_inside_bounds_dir = 0;
-	     offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
-	     offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
-		static int bufs_skipped;
-		int this_entries;
-
-		if (inspect_self) {
-			dig_bounds_dir_ptr =
-				fill_bounds_dir_buf_self(offset_inside_bounds_dir,
-							 sizeof(bounds_dir_buf),
-							 &bounds_dir_buf[0]);
-		} else {
-			dig_bounds_dir_ptr =
-				fill_bounds_dir_buf_other(offset_inside_bounds_dir,
-							  sizeof(bounds_dir_buf),
-							  &bounds_dir_buf[0]);
-		}
-		if (!dig_bounds_dir_ptr) {
-			bufs_skipped++;
-			continue;
-		}
-		this_entries = search_bd_buf(dig_bounds_dir_ptr,
-					sizeof(bounds_dir_buf),
-					offset_inside_bounds_dir,
-					&nr_populated_bdes);
-		total_entries += this_entries;
-	}
-	printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
-			total_entries, nr_populated_bdes);
-	return total_entries + nr_populated_bdes;
-}
-
-#ifdef MPX_DIG_REMOTE
-int main(int argc, char **argv)
-{
-	int err;
-	char *c;
-	unsigned long bounds_dir_entry;
-	int pid;
-
-	printf("mpx-dig starting...\n");
-	err = sscanf(argv[1], "%d", &pid);
-	printf("parsing: '%s', err: %d\n", argv[1], err);
-	if (err != 1)
-		mpx_dig_abort();
-
-	err = sscanf(argv[2], "%lx", &bounds_dir_global);
-	printf("parsing: '%s': %d\n", argv[2], err);
-	if (err != 1)
-		mpx_dig_abort();
-
-	proc_pid_mem_fd = open_proc(pid, "mem");
-	if (proc_pid_mem_fd < 0)
-		mpx_dig_abort();
-
-	inspect_pid(pid);
-	return 0;
-}
-#endif
-
-long inspect_me(struct mpx_bounds_dir *bounds_dir)
-{
-	int pid = getpid();
-
-	pid_load_vaddrs(pid);
-	bounds_dir_global = (unsigned long)bounds_dir;
-	dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
-	return inspect_pid(pid);
-}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
deleted file mode 100644
index d1b61ab..0000000
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_HW_H
-#define _MPX_HW_H
-
-#include <assert.h>
-
-/* Describe the MPX Hardware Layout in here */
-
-#define NR_MPX_BOUNDS_REGISTERS 4
-
-#ifdef __i386__
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	16 /* 4 * 32-bits */
-#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 14) /* 16k */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		4
-#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 22) /* 4MB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT		2
-#define MPX_BOUNDS_TABLE_TOP_BIT		11
-#define MPX_BOUNDS_DIR_BOTTOM_BIT		12
-#define MPX_BOUNDS_DIR_TOP_BIT			31
-
-#else
-
-/*
- * Linear Address of "pointer" (LAp)
- *   0 ->  2: ignored
- *   3 -> 19: index in to bounds table
- *  20 -> 47: index in to bounds directory
- *  48 -> 63: ignored
- */
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	32
-#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 22) /* 4MB */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		8
-#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 31) /* 2GB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT		3
-#define MPX_BOUNDS_TABLE_TOP_BIT		19
-#define MPX_BOUNDS_DIR_BOTTOM_BIT		20
-#define MPX_BOUNDS_DIR_TOP_BIT			47
-
-#endif
-
-#define MPX_BOUNDS_DIR_NR_ENTRIES	\
-	(MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
-#define MPX_BOUNDS_TABLE_NR_ENTRIES	\
-	(MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
-
-#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT	0x1
-
-struct mpx_bd_entry {
-	union {
-		char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
-		void *contents[0];
-	};
-} __attribute__((packed));
-
-struct mpx_bt_entry {
-	union {
-		char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
-		unsigned long contents[0];
-	};
-} __attribute__((packed));
-
-struct mpx_bounds_dir {
-	struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
-} __attribute__((packed));
-
-struct mpx_bounds_table {
-	struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
-} __attribute__((packed));
-
-static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
-{
-	int total_nr_bits = topbit - bottombit;
-	unsigned long mask = (1UL << total_nr_bits)-1;
-	return (val >> bottombit) & mask;
-}
-
-static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
-{
-	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
-					      MPX_BOUNDS_TABLE_TOP_BIT);
-}
-
-static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
-{
-	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
-					      MPX_BOUNDS_DIR_TOP_BIT);
-}
-
-static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
-		struct mpx_bounds_dir *bounds_dir)
-{
-	unsigned long index = __vaddr_bounds_directory_index(vaddr);
-	return &bounds_dir->entries[index];
-}
-
-static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
-{
-	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
-	return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
-}
-
-static inline struct mpx_bounds_table *
-__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
-{
-	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
-	assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
-	__bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
-	return (struct mpx_bounds_table *)__bd_entry;
-}
-
-static inline struct mpx_bt_entry *
-mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
-{
-	struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
-	struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
-	unsigned long index = __vaddr_bounds_table_index(vaddr);
-	return &bt->entries[index];
-}
-
-#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
deleted file mode 100644
index 50f7e92..0000000
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ /dev/null
@@ -1,1616 +0,0 @@
-/*
- * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
- *
- * Written by:
- * "Ren, Qiaowei" <qiaowei.ren@intel.com>
- * "Wei, Gang" <gang.wei@intel.com>
- * "Hansen, Dave" <dave.hansen@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2.
- */
-
-/*
- * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
- *	       it works on 32-bit.
- */
-
-int inspect_every_this_many_mallocs = 100;
-int zap_all_every_this_many_mallocs = 1000;
-
-#define _GNU_SOURCE
-#define _LARGEFILE64_SOURCE
-
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "mpx-hw.h"
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-
-#ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline)
-#endif
-
-#ifndef TEST_DURATION_SECS
-#define TEST_DURATION_SECS 3
-#endif
-
-void write_int_to(char *prefix, char *file, int int_to_write)
-{
-	char buf[100];
-	int fd = open(file, O_RDWR);
-	int len;
-	int ret;
-
-	assert(fd >= 0);
-	len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
-	assert(len >= 0);
-	assert(len < sizeof(buf));
-	ret = write(fd, buf, len);
-	assert(ret == len);
-	ret = close(fd);
-	assert(!ret);
-}
-
-void write_pid_to(char *prefix, char *file)
-{
-	write_int_to(prefix, file, getpid());
-}
-
-void trace_me(void)
-{
-/* tracing events dir */
-#define TED "/sys/kernel/debug/tracing/events/"
-/*
-	write_pid_to("common_pid=", TED "signal/filter");
-	write_pid_to("common_pid=", TED "exceptions/filter");
-	write_int_to("", TED "signal/enable", 1);
-	write_int_to("", TED "exceptions/enable", 1);
-*/
-	write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
-	write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
-}
-
-#define test_failed() __test_failed(__FILE__, __LINE__)
-static void __test_failed(char *f, int l)
-{
-	fprintf(stderr, "abort @ %s::%d\n", f, l);
-	abort();
-}
-
-/* Error Printf */
-#define eprintf(args...)	fprintf(stderr, args)
-
-#ifdef __i386__
-
-/* i386 directory size is 4MB */
-#define REG_IP_IDX	REG_EIP
-#define REX_PREFIX
-
-#define XSAVE_OFFSET_IN_FPMEM	sizeof(struct _libc_fpstate)
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-		unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	asm volatile(
-		"push %%ebx;"
-		"cpuid;"
-		"mov %%ebx, %1;"
-		"pop %%ebx"
-		: "=a" (*eax),
-		  "=g" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
-#else /* __i386__ */
-
-#define REG_IP_IDX	REG_RIP
-#define REX_PREFIX "0x48, "
-
-#define XSAVE_OFFSET_IN_FPMEM	0
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-		unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	asm volatile(
-		"cpuid;"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
-#endif /* !__i386__ */
-
-struct xsave_hdr_struct {
-	uint64_t xstate_bv;
-	uint64_t reserved1[2];
-	uint64_t reserved2[5];
-} __attribute__((packed));
-
-struct bndregs_struct {
-	uint64_t bndregs[8];
-} __attribute__((packed));
-
-struct bndcsr_struct {
-	uint64_t cfg_reg_u;
-	uint64_t status_reg;
-} __attribute__((packed));
-
-struct xsave_struct {
-	uint8_t fpu_sse[512];
-	struct xsave_hdr_struct xsave_hdr;
-	uint8_t ymm[256];
-	uint8_t lwp[128];
-	struct bndregs_struct bndregs;
-	struct bndcsr_struct bndcsr;
-} __attribute__((packed));
-
-uint8_t __attribute__((__aligned__(64))) buffer[4096];
-struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
-
-uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
-struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
-
-uint64_t num_bnd_chk;
-
-static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
-{
-	uint32_t lmask = mask;
-	uint32_t hmask = mask >> 32;
-
-	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
-		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-		     :   "memory");
-}
-
-static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
-{
-	uint32_t lmask = mask;
-	uint32_t hmask = mask >> 32;
-	unsigned char *fx = _fx;
-
-	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
-		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-		     :   "memory");
-}
-
-static inline uint64_t xgetbv(uint32_t index)
-{
-	uint32_t eax, edx;
-
-	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (index));
-	return eax + ((uint64_t)edx << 32);
-}
-
-static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
-{
-	memset(buffer, 0, sizeof(buffer));
-	memcpy(buffer,
-		(uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
-		sizeof(struct xsave_struct));
-
-	return xsave_buf->bndcsr.status_reg;
-}
-
-#include <pthread.h>
-
-static uint8_t *get_next_inst_ip(uint8_t *addr)
-{
-	uint8_t *ip = addr;
-	uint8_t sib;
-	uint8_t rm;
-	uint8_t mod;
-	uint8_t base;
-	uint8_t modrm;
-
-	/* determine the prefix. */
-	switch(*ip) {
-	case 0xf2:
-	case 0xf3:
-	case 0x66:
-		ip++;
-		break;
-	}
-
-	/* look for rex prefix */
-	if ((*ip & 0x40) == 0x40)
-		ip++;
-
-	/* Make sure we have a MPX instruction. */
-	if (*ip++ != 0x0f)
-		return addr;
-
-	/* Skip the op code byte. */
-	ip++;
-
-	/* Get the modrm byte. */
-	modrm = *ip++;
-
-	/* Break it down into parts. */
-	rm = modrm & 7;
-	mod = (modrm >> 6);
-
-	/* Init the parts of the address mode. */
-	base = 8;
-
-	/* Is it a mem mode? */
-	if (mod != 3) {
-		/* look for scaled indexed addressing */
-		if (rm == 4) {
-			/* SIB addressing */
-			sib = *ip++;
-			base = sib & 7;
-			switch (mod) {
-			case 0:
-				if (base == 5)
-					ip += 4;
-				break;
-
-			case 1:
-				ip++;
-				break;
-
-			case 2:
-				ip += 4;
-				break;
-			}
-
-		} else {
-			/* MODRM addressing */
-			switch (mod) {
-			case 0:
-				/* DISP32 addressing, no base */
-				if (rm == 5)
-					ip += 4;
-				break;
-
-			case 1:
-				ip++;
-				break;
-
-			case 2:
-				ip += 4;
-				break;
-			}
-		}
-	}
-	return ip;
-}
-
-#ifdef si_lower
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
-	return si->si_lower;
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
-	return si->si_upper;
-}
-#else
-
-/*
- * This deals with old version of _sigfault in some distros:
- *
-
-old _sigfault:
-        struct {
-            void *si_addr;
-	} _sigfault;
-
-new _sigfault:
-	struct {
-		void __user *_addr;
-		int _trapno;
-		short _addr_lsb;
-		union {
-			struct {
-				void __user *_lower;
-				void __user *_upper;
-			} _addr_bnd;
-			__u32 _pkey;
-		};
-	} _sigfault;
- *
- */
-
-static inline void **__si_bounds_hack(siginfo_t *si)
-{
-	void *sigfault = &si->_sifields._sigfault;
-	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
-	int *trapno = (int*)end_sigfault;
-	/* skip _trapno and _addr_lsb */
-	void **__si_lower = (void**)(trapno + 2);
-
-	return __si_lower;
-}
-
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
-	return *__si_bounds_hack(si);
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
-	return *(__si_bounds_hack(si) + 1);
-}
-#endif
-
-static int br_count;
-static int expected_bnd_index = -1;
-uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
-unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR	3
-#endif
-
-/*
- * The kernel is supposed to provide some information about the bounds
- * exception in the siginfo.  It should match what we have in the bounds
- * registers that we are checking against.  Just check against the shadow copy
- * since it is easily available, and we also check that *it* matches the real
- * registers.
- */
-void check_siginfo_vs_shadow(siginfo_t* si)
-{
-	int siginfo_ok = 1;
-	void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
-	void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
-
-	if ((expected_bnd_index < 0) ||
-	    (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
-		fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
-			expected_bnd_index);
-		exit(6);
-	}
-	if (__si_bounds_lower(si) != shadow_lower)
-		siginfo_ok = 0;
-	if (__si_bounds_upper(si) != shadow_upper)
-		siginfo_ok = 0;
-
-	if (!siginfo_ok) {
-		fprintf(stderr, "ERROR: siginfo bounds do not match "
-			"shadow bounds for register %d\n", expected_bnd_index);
-		exit(7);
-	}
-}
-
-void handler(int signum, siginfo_t *si, void *vucontext)
-{
-	int i;
-	ucontext_t *uctxt = vucontext;
-	int trapno;
-	unsigned long ip;
-
-	dprintf1("entered signal handler\n");
-
-	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
-	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
-
-	if (trapno == 5) {
-		typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
-		uint64_t status = read_mpx_status_sig(uctxt);
-		uint64_t br_reason =  status & 0x3;
-
-		br_count++;
-		dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
-
-		dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
-				status, ip, br_reason);
-		dprintf2("si_signo: %d\n", si->si_signo);
-		dprintf2("  signum: %d\n", signum);
-		dprintf2("info->si_code == SEGV_BNDERR: %d\n",
-				(si->si_code == SEGV_BNDERR));
-		dprintf2("info->si_code: %d\n", si->si_code);
-		dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
-		dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
-
-		for (i = 0; i < 8; i++)
-			dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
-		switch (br_reason) {
-		case 0: /* traditional BR */
-			fprintf(stderr,
-				"Undefined status with bound exception:%jx\n",
-				 status);
-			exit(5);
-		case 1: /* #BR MPX bounds exception */
-			/* these are normal and we expect to see them */
-
-			check_siginfo_vs_shadow(si);
-
-			dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
-				status, (void *)ip, si->si_addr);
-			num_bnd_chk++;
-			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
-				(greg_t)get_next_inst_ip((uint8_t *)ip);
-			break;
-		case 2:
-			fprintf(stderr, "#BR status == 2, missing bounds table,"
-					"kernel should have handled!!\n");
-			exit(4);
-			break;
-		default:
-			fprintf(stderr, "bound check error: status 0x%jx at %p\n",
-				status, (void *)ip);
-			num_bnd_chk++;
-			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
-				(greg_t)get_next_inst_ip((uint8_t *)ip);
-			fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
-			exit(3);
-		}
-	} else if (trapno == 14) {
-		eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
-			trapno, ip);
-		eprintf("si_addr %p\n", si->si_addr);
-		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-		test_failed();
-	} else {
-		eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
-		eprintf("si_addr %p\n", si->si_addr);
-		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-		test_failed();
-	}
-}
-
-static inline void cpuid_count(unsigned int op, int count,
-			       unsigned int *eax, unsigned int *ebx,
-			       unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = count;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-#define XSTATE_CPUID	    0x0000000d
-
-/*
- * List of XSAVE features Linux knows about:
- */
-enum xfeature_bit {
-	XSTATE_BIT_FP,
-	XSTATE_BIT_SSE,
-	XSTATE_BIT_YMM,
-	XSTATE_BIT_BNDREGS,
-	XSTATE_BIT_BNDCSR,
-	XSTATE_BIT_OPMASK,
-	XSTATE_BIT_ZMM_Hi256,
-	XSTATE_BIT_Hi16_ZMM,
-
-	XFEATURES_NR_MAX,
-};
-
-#define XSTATE_FP	       (1 << XSTATE_BIT_FP)
-#define XSTATE_SSE	      (1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM	      (1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS	  (1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR	   (1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK	   (1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM	 (1 << XSTATE_BIT_Hi16_ZMM)
-
-#define MPX_XSTATES		(XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
-
-bool one_bit(unsigned int x, int bit)
-{
-	return !!(x & (1<<bit));
-}
-
-void print_state_component(int state_bit_nr, char *name)
-{
-	unsigned int eax, ebx, ecx, edx;
-	unsigned int state_component_size;
-	unsigned int state_component_supervisor;
-	unsigned int state_component_user;
-	unsigned int state_component_aligned;
-
-	/* See SDM Section 13.2 */
-	cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
-	assert(eax || ebx || ecx);
-	state_component_size = eax;
-	state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
-	state_component_user = !one_bit(ecx, 0);
-	state_component_aligned = one_bit(ecx, 1);
-	printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
-		name,
-		state_component_size,	    state_component_user,
-		state_component_supervisor, state_component_aligned);
-
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
-#define XSAVE_FEATURE_BIT       (26)  /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define OSXSAVE_FEATURE_BIT     (27) /* XSAVE enabled in the OS */
-
-bool check_mpx_support(void)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
-
-	/* We can't do much without XSAVE, so just make these assert()'s */
-	if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
-		fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
-		exit(0);
-	}
-
-	if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
-		fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
-		exit(0);
-	}
-
-	/* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
-	/* Is this redundant with the feature bit checks? */
-	cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
-	if (eax < XSTATE_CPUID) {
-		fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
-				" can not run MPX tests\n");
-		exit(0);
-	}
-
-	printf("XSAVE is supported by HW & OS\n");
-
-	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-
-	printf("XSAVE processor supported state mask: 0x%x\n", eax);
-	printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
-
-	/* Make sure that the MPX states are enabled in in XCR0 */
-	if ((eax & MPX_XSTATES) != MPX_XSTATES) {
-		fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
-		exit(0);
-	}
-
-	/* Make sure the MPX states are supported by XSAVE* */
-	if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
-		fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
-				"can not run MPX tests\n");
-		exit(0);
-	}
-
-	print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
-	print_state_component(XSTATE_BIT_BNDCSR,  "BNDCSR");
-
-	return true;
-}
-
-void enable_mpx(void *l1base)
-{
-	/* enable point lookup */
-	memset(buffer, 0, sizeof(buffer));
-	xrstor_state(xsave_buf, 0x18);
-
-	xsave_buf->xsave_hdr.xstate_bv = 0x10;
-	xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
-	xsave_buf->bndcsr.status_reg = 0;
-
-	dprintf2("bf xrstor\n");
-	dprintf2("xsave cndcsr: status %jx, configu %jx\n",
-	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
-	xrstor_state(xsave_buf, 0x18);
-	dprintf2("after xrstor\n");
-
-	xsave_state_1(xsave_buf, 0x18);
-
-	dprintf1("xsave bndcsr: status %jx, configu %jx\n",
-	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
-}
-
-#include <sys/prctl.h>
-
-struct mpx_bounds_dir *bounds_dir_ptr;
-
-unsigned long __bd_incore(const char *func, int line)
-{
-	unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
-	return ret;
-}
-#define bd_incore() __bd_incore(__func__, __LINE__)
-
-void check_clear(void *ptr, unsigned long sz)
-{
-	unsigned long *i;
-
-	for (i = ptr; (void *)i < ptr + sz; i++) {
-		if (*i) {
-			dprintf1("%p is NOT clear at %p\n", ptr, i);
-			assert(0);
-		}
-	}
-	dprintf1("%p is clear for %lx\n", ptr, sz);
-}
-
-void check_clear_bd(void)
-{
-	check_clear(bounds_dir_ptr, 2UL << 30);
-}
-
-#define USE_MALLOC_FOR_BOUNDS_DIR 1
-bool process_specific_init(void)
-{
-	unsigned long size;
-	unsigned long *dir;
-	/* Guarantee we have the space to align it, add padding: */
-	unsigned long pad = getpagesize();
-
-	size = 2UL << 30; /* 2GB */
-	if (sizeof(unsigned long) == 4)
-		size = 4UL << 20; /* 4MB */
-	dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
-
-	if (USE_MALLOC_FOR_BOUNDS_DIR) {
-		unsigned long _dir;
-
-		dir = malloc(size + pad);
-		assert(dir);
-		_dir = (unsigned long)dir;
-		_dir += 0xfffUL;
-		_dir &= ~0xfffUL;
-		dir = (void *)_dir;
-	} else {
-		/*
-		 * This makes debugging easier because the address
-		 * calculations are simpler:
-		 */
-		dir = mmap((void *)0x200000000000, size + pad,
-				PROT_READ|PROT_WRITE,
-				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-		if (dir == (void *)-1) {
-			perror("unable to allocate bounds directory");
-			abort();
-		}
-		check_clear(dir, size);
-	}
-	bounds_dir_ptr = (void *)dir;
-	madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
-	bd_incore();
-	dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
-			(char *)bounds_dir_ptr + size);
-	check_clear(dir, size);
-	enable_mpx(dir);
-	check_clear(dir, size);
-	if (prctl(43, 0, 0, 0, 0)) {
-		printf("no MPX support\n");
-		abort();
-		return false;
-	}
-	return true;
-}
-
-bool process_specific_finish(void)
-{
-	if (prctl(44)) {
-		printf("no MPX support\n");
-		return false;
-	}
-	return true;
-}
-
-void setup_handler()
-{
-	int r, rs;
-	struct sigaction newact;
-	struct sigaction oldact;
-
-	/* #BR is mapped to sigsegv */
-	int signum  = SIGSEGV;
-
-	newact.sa_handler = 0;   /* void(*)(int)*/
-	newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
-
-	/*sigset_t - signals to block while in the handler */
-	/* get the old signal mask. */
-	rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
-	assert(rs == 0);
-
-	/* call sa_sigaction, not sa_handler*/
-	newact.sa_flags = SA_SIGINFO;
-
-	newact.sa_restorer = 0;  /* void(*)(), obsolete */
-	r = sigaction(signum, &newact, &oldact);
-	assert(r == 0);
-}
-
-void mpx_prepare(void)
-{
-	dprintf2("%s()\n", __func__);
-	setup_handler();
-	process_specific_init();
-}
-
-void mpx_cleanup(void)
-{
-	printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
-	process_specific_finish();
-}
-
-/*-------------- the following is test case ---------------*/
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-uint64_t num_lower_brs;
-uint64_t num_upper_brs;
-
-#define MPX_CONFIG_OFFSET 1024
-#define MPX_BOUNDS_OFFSET 960
-#define MPX_HEADER_OFFSET 512
-#define MAX_ADDR_TESTED (1<<28)
-#define TEST_ROUNDS 100
-
-/*
-      0F 1A /r BNDLDX-Load
-      0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
-   66 0F 1A /r BNDMOV bnd1, bnd2/m128
-   66 0F 1B /r BNDMOV bnd1/m128, bnd2
-   F2 0F 1A /r BNDCU bnd, r/m64
-   F2 0F 1B /r BNDCN bnd, r/m64
-   F3 0F 1A /r BNDCL bnd, r/m64
-   F3 0F 1B /r BNDMK bnd, m64
-*/
-
-static __always_inline void xsave_state(void *_fx, uint64_t mask)
-{
-	uint32_t lmask = mask;
-	uint32_t hmask = mask >> 32;
-	unsigned char *fx = _fx;
-
-	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
-		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
-		     :   "memory");
-}
-
-static __always_inline void mpx_clear_bnd0(void)
-{
-	long size = 0;
-	void *ptr = NULL;
-	/* F3 0F 1B /r BNDMK bnd, m64			*/
-	/* f3 0f 1b 04 11    bndmk  (%rcx,%rdx,1),%bnd0	*/
-	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
-		     : : "c" (ptr), "d" (size-1)
-		     :   "memory");
-}
-
-static __always_inline void mpx_make_bound_helper(unsigned long ptr,
-		unsigned long size)
-{
-	/* F3 0F 1B /r		BNDMK bnd, m64			*/
-	/* f3 0f 1b 04 11       bndmk  (%rcx,%rdx,1),%bnd0	*/
-	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
-		     : : "c" (ptr), "d" (size-1)
-		     :   "memory");
-}
-
-static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
-{
-	/* F3 0F 1A /r	NDCL bnd, r/m64			*/
-	/* f3 0f 1a 01	bndcl  (%rcx),%bnd0		*/
-	asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
-		     : : "c" (ptr)
-		     :   "memory");
-}
-
-static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
-{
-	/* F2 0F 1A /r	BNDCU bnd, r/m64	*/
-	/* f2 0f 1a 01	bndcu  (%rcx),%bnd0	*/
-	asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
-		     : : "c" (ptr)
-		     :   "memory");
-}
-
-static __always_inline void mpx_movbndreg_helper()
-{
-	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
-	/* 66 0f 1b c2	bndmov %bnd0,%bnd2	*/
-
-	asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
-}
-
-static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
-{
-	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
-	/* 66 0f 1b 01	bndmov %bnd0,(%rcx)	*/
-	asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
-		     : : "c" (mem)
-		     :   "memory");
-}
-
-static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
-{
-	/* 66 0F 1A /r	BNDMOV bnd1, bnd2/m128	*/
-	/* 66 0f 1a 01	bndmov (%rcx),%bnd0	*/
-	asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
-		     : : "c" (mem)
-		     :   "memory");
-}
-
-static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
-		unsigned long ptr_val)
-{
-	/* 0F 1B /r	BNDSTX-Store Extended Bounds Using Address Translation	*/
-	/* 0f 1b 04 11	bndstx %bnd0,(%rcx,%rdx,1)				*/
-	asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
-		     : : "c" (ptr_addr), "d" (ptr_val)
-		     :   "memory");
-}
-
-static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
-		unsigned long ptr_val)
-{
-	/* 0F 1A /r	BNDLDX-Load			*/
-	/*/ 0f 1a 04 11	bndldx (%rcx,%rdx,1),%bnd0	*/
-	asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
-		     : : "c" (ptr_addr), "d" (ptr_val)
-		     :   "memory");
-}
-
-void __print_context(void *__print_xsave_buffer, int line)
-{
-	uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
-	uint64_t *cfg    = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
-
-	int i;
-	eprintf("%s()::%d\n", "print_context", line);
-	for (i = 0; i < 4; i++) {
-		eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
-		       (unsigned long)bounds[i*2],
-		       ~(unsigned long)bounds[i*2+1],
-			(unsigned long)bounds[i*2+1]);
-	}
-
-	eprintf("cpcfg: %jx  cpstatus: %jx\n", cfg[0], cfg[1]);
-}
-#define print_context(x) __print_context(x, __LINE__)
-#ifdef DEBUG
-#define dprint_context(x) print_context(x)
-#else
-#define dprint_context(x) do{}while(0)
-#endif
-
-void init()
-{
-	int i;
-
-	srand((unsigned int)time(NULL));
-
-	for (i = 0; i < 4; i++) {
-		shadow_plb[i][0] = 0;
-		shadow_plb[i][1] = ~(unsigned long)0;
-	}
-}
-
-long int __mpx_random(int line)
-{
-#ifdef NOT_SO_RANDOM
-	static long fake = 722122311;
-	fake += 563792075;
-	return fakse;
-#else
-	return random();
-#endif
-}
-#define mpx_random() __mpx_random(__LINE__)
-
-uint8_t *get_random_addr()
-{
-	uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
-	return (addr - (unsigned long)addr % sizeof(uint8_t *));
-}
-
-static inline bool compare_context(void *__xsave_buffer)
-{
-	uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
-
-	int i;
-	for (i = 0; i < 4; i++) {
-		dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
-		       i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
-		       i, (unsigned long)bounds[i*2],     ~(unsigned long)bounds[i*2+1]);
-		if ((shadow_plb[i][0] != bounds[i*2]) ||
-		    (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
-			eprintf("ERROR comparing shadow to real bound register %d\n", i);
-			eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
-			       (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
-			       (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
-			return false;
-		}
-	}
-
-	return true;
-}
-
-void mkbnd_shadow(uint8_t *ptr, int index, long offset)
-{
-	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
-	uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
-	*lower = (unsigned long)ptr;
-	*upper = (unsigned long)ptr + offset - 1;
-}
-
-void check_lowerbound_shadow(uint8_t *ptr, int index)
-{
-	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
-	if (*lower > (uint64_t)(unsigned long)ptr)
-		num_lower_brs++;
-	else
-		dprintf1("LowerBoundChk passed:%p\n", ptr);
-}
-
-void check_upperbound_shadow(uint8_t *ptr, int index)
-{
-	uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
-	if (upper < (uint64_t)(unsigned long)ptr)
-		num_upper_brs++;
-	else
-		dprintf1("UpperBoundChk passed:%p\n", ptr);
-}
-
-__always_inline void movbndreg_shadow(int src, int dest)
-{
-	shadow_plb[dest][0] = shadow_plb[src][0];
-	shadow_plb[dest][1] = shadow_plb[src][1];
-}
-
-__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
-{
-	unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
-	unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
-	*dest = *lower;
-	*(dest+1) = *upper;
-}
-
-__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
-{
-	unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
-	unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
-	*lower = *src;
-	*upper = *(src+1);
-}
-
-__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
-	shadow_map[0] = (unsigned long)shadow_plb[index][0];
-	shadow_map[1] = (unsigned long)shadow_plb[index][1];
-	shadow_map[2] = (unsigned long)ptr_val;
-	dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
-			index, ptr, ptr_val, ptr_val);
-	/*ptr ignored */
-}
-
-void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
-	uint64_t lower = shadow_map[0];
-	uint64_t upper = shadow_map[1];
-	uint8_t *value = (uint8_t *)shadow_map[2];
-
-	if (value != ptr_val) {
-		dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
-			 "because %p != %p\n", __func__, index, ptr,
-			 ptr_val, index, value, ptr_val);
-		shadow_plb[index][0] = 0;
-		shadow_plb[index][1] = ~(unsigned long)0;
-	} else {
-		shadow_plb[index][0] = lower;
-		shadow_plb[index][1] = upper;
-	}
-	/* ptr ignored */
-}
-
-static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
-{
-	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
-}
-
-static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	mkbnd_shadow(ptr, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
-{
-	/* these are hard-coded to check bnd0 */
-	expected_bnd_index = 0;
-	mpx_check_lowerbound_helper((unsigned long)(ptr-1));
-	mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
-	/* reset this since we do not expect any more bounds exceptions */
-	expected_bnd_index = -1;
-}
-
-static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	check_lowerbound_shadow(ptr-1, 0);
-	check_upperbound_shadow(ptr+0x1800, 0);
-}
-
-static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
-{
-	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
-	mpx_movbndreg_helper();
-	mpx_movbnd2mem_helper(buf);
-	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	mkbnd_shadow(ptr, 0, 0x1800);
-	movbndreg_shadow(0, 2);
-	movbnd2mem_shadow(0, (unsigned long *)buf);
-	mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
-{
-	mpx_movbnd_from_mem_helper(buf);
-}
-
-static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	movbnd_from_mem_shadow((unsigned long *)buf, 0);
-}
-
-static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
-{
-	mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
-	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	stdsc_shadow(0, buf, ptr);
-	mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
-{
-	mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
-}
-
-static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
-{
-	lddsc_shadow(0, buf, ptr);
-}
-
-#define NR_MPX_TEST_FUNCTIONS 6
-
-/*
- * For compatibility reasons, MPX will clear the bounds registers
- * when you make function calls (among other things).  We have to
- * preserve the registers in between calls to the "helpers" since
- * they build on each other.
- *
- * Be very careful not to make any function calls inside the
- * helpers, or anywhere else beween the xrstor and xsave.
- */
-#define run_helper(helper_nr, buf, buf_shadow, ptr)	do {	\
-	xrstor_state(xsave_test_buf, flags);			\
-	mpx_test_helper##helper_nr(buf, ptr);			\
-	xsave_state(xsave_test_buf, flags);			\
-	mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr);	\
-} while (0)
-
-static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
-{
-	uint64_t flags = 0x18;
-
-	dprint_context(xsave_test_buf);
-	switch (nr) {
-	case 0:
-		run_helper(0, buf, buf_shadow, ptr);
-		break;
-	case 1:
-		run_helper(1, buf, buf_shadow, ptr);
-		break;
-	case 2:
-		run_helper(2, buf, buf_shadow, ptr);
-		break;
-	case 3:
-		run_helper(3, buf, buf_shadow, ptr);
-		break;
-	case 4:
-		run_helper(4, buf, buf_shadow, ptr);
-		break;
-	case 5:
-		run_helper(5, buf, buf_shadow, ptr);
-		break;
-	default:
-		test_failed();
-		break;
-	}
-	dprint_context(xsave_test_buf);
-}
-
-unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
-extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
-
-long cover_buf_with_bt_entries(void *buf, long buf_len)
-{
-	int i;
-	long nr_to_fill;
-	int ratio = 1000;
-	unsigned long buf_len_in_ptrs;
-
-	/* Fill about 1/100 of the space with bt entries */
-	nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
-
-	if (!nr_to_fill)
-		dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
-
-	/* Align the buffer to pointer size */
-	while (((unsigned long)buf) % sizeof(void *)) {
-		buf++;
-		buf_len--;
-	}
-	/* We are storing pointers, so make */
-	buf_len_in_ptrs = buf_len / sizeof(void *);
-
-	for (i = 0; i < nr_to_fill; i++) {
-		long index = (mpx_random() % buf_len_in_ptrs);
-		void *ptr = buf + index * sizeof(unsigned long);
-		unsigned long ptr_addr = (unsigned long)ptr;
-
-		/* ptr and size can be anything */
-		mpx_make_bound_helper((unsigned long)ptr, 8);
-
-		/*
-		 * take bnd0 and put it in to bounds tables "buf + index" is an
-		 * address inside the buffer where we are pretending that we
-		 * are going to put a pointer We do not, though because we will
-		 * never load entries from the table, so it doesn't matter.
-		 */
-		mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
-		dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
-				ptr_addr, buf);
-	}
-	return nr_to_fill;
-}
-
-unsigned long align_down(unsigned long alignme, unsigned long align_to)
-{
-	return alignme & ~(align_to-1);
-}
-
-unsigned long align_up(unsigned long alignme, unsigned long align_to)
-{
-	return (alignme + align_to - 1) & ~(align_to-1);
-}
-
-/*
- * Using 1MB alignment guarantees that each no allocation
- * will overlap with another's bounds tables.
- *
- * We have to cook our own allocator here.  malloc() can
- * mix other allocation with ours which means that even
- * if we free all of our allocations, there might still
- * be bounds tables for the *areas* since there is other
- * valid memory there.
- *
- * We also can't use malloc() because a free() of an area
- * might not free it back to the kernel.  We want it
- * completely unmapped an malloc() does not guarantee
- * that.
- */
-#ifdef __i386__
-long alignment = 4096;
-long sz_alignment = 4096;
-#else
-long alignment = 1 * MB;
-long sz_alignment = 1 * MB;
-#endif
-void *mpx_mini_alloc(unsigned long sz)
-{
-	unsigned long long tries = 0;
-	static void *last;
-	void *ptr;
-	void *try_at;
-
-	sz = align_up(sz, sz_alignment);
-
-	try_at = last + alignment;
-	while (1) {
-		ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
-				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-		if (ptr == (void *)-1)
-			return NULL;
-		if (ptr == try_at)
-			break;
-
-		munmap(ptr, sz);
-		try_at += alignment;
-#ifdef __i386__
-		/*
-		 * This isn't quite correct for 32-bit binaries
-		 * on 64-bit kernels since they can use the
-		 * entire 32-bit address space, but it's close
-		 * enough.
-		 */
-		if (try_at > (void *)0xC0000000)
-#else
-		if (try_at > (void *)0x0000800000000000)
-#endif
-			try_at = (void *)0x0;
-		if (!(++tries % 10000))
-			dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
-		continue;
-	}
-	last = ptr;
-	dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
-	return ptr;
-}
-void mpx_mini_free(void *ptr, long sz)
-{
-	dprintf2("%s() ptr: %p\n", __func__, ptr);
-	if ((unsigned long)ptr > 0x100000000000) {
-		dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
-		test_failed();
-	}
-	sz = align_up(sz, sz_alignment);
-	dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
-	munmap(ptr, sz);
-	dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
-}
-
-#define NR_MALLOCS 100
-struct one_malloc {
-	char *ptr;
-	int nr_filled_btes;
-	unsigned long size;
-};
-struct one_malloc mallocs[NR_MALLOCS];
-
-void free_one_malloc(int index)
-{
-	unsigned long free_ptr;
-	unsigned long mask;
-
-	if (!mallocs[index].ptr)
-		return;
-
-	mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
-	dprintf4("freed[%d]:  %p\n", index, mallocs[index].ptr);
-
-	free_ptr = (unsigned long)mallocs[index].ptr;
-	mask = alignment-1;
-	dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
-			(free_ptr & mask), mask);
-	assert((free_ptr & mask) == 0);
-
-	mallocs[index].ptr = NULL;
-}
-
-#ifdef __i386__
-#define MPX_BOUNDS_TABLE_COVERS 4096
-#else
-#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
-#endif
-void zap_everything(void)
-{
-	long after_zap;
-	long before_zap;
-	int i;
-
-	before_zap = inspect_me(bounds_dir_ptr);
-	dprintf1("zapping everything start: %ld\n", before_zap);
-	for (i = 0; i < NR_MALLOCS; i++)
-		free_one_malloc(i);
-
-	after_zap = inspect_me(bounds_dir_ptr);
-	dprintf1("zapping everything done: %ld\n", after_zap);
-	/*
-	 * We only guarantee to empty the thing out if our allocations are
-	 * exactly aligned on the boundaries of a boudns table.
-	 */
-	if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
-	    (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
-		if (after_zap != 0)
-			test_failed();
-
-		assert(after_zap == 0);
-	}
-}
-
-void do_one_malloc(void)
-{
-	static int malloc_counter;
-	long sz;
-	int rand_index = (mpx_random() % NR_MALLOCS);
-	void *ptr = mallocs[rand_index].ptr;
-
-	dprintf3("%s() enter\n", __func__);
-
-	if (ptr) {
-		dprintf3("freeing one malloc at index: %d\n", rand_index);
-		free_one_malloc(rand_index);
-		if (mpx_random() % (NR_MALLOCS*3) == 3) {
-			int i;
-			dprintf3("zapping some more\n");
-			for (i = rand_index; i < NR_MALLOCS; i++)
-				free_one_malloc(i);
-		}
-		if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
-			zap_everything();
-	}
-
-	/* 1->~1M */
-	sz = (1 + mpx_random() % 1000) * 1000;
-	ptr = mpx_mini_alloc(sz);
-	if (!ptr) {
-		/*
-		 * If we are failing allocations, just assume we
-		 * are out of memory and zap everything.
-		 */
-		dprintf3("zapping everything because out of memory\n");
-		zap_everything();
-		goto out;
-	}
-
-	dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
-	mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
-	mallocs[rand_index].ptr = ptr;
-	mallocs[rand_index].size = sz;
-out:
-	if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
-		inspect_me(bounds_dir_ptr);
-}
-
-void run_timed_test(void (*test_func)(void))
-{
-	int done = 0;
-	long iteration = 0;
-	static time_t last_print;
-	time_t now;
-	time_t start;
-
-	time(&start);
-	while (!done) {
-		time(&now);
-		if ((now - start) > TEST_DURATION_SECS)
-			done = 1;
-
-		test_func();
-		iteration++;
-
-		if ((now - last_print > 1) || done) {
-			printf("iteration %ld complete, OK so far\n", iteration);
-			last_print = now;
-		}
-	}
-}
-
-void check_bounds_table_frees(void)
-{
-	printf("executing unmaptest\n");
-	inspect_me(bounds_dir_ptr);
-	run_timed_test(&do_one_malloc);
-	printf("done with malloc() fun\n");
-}
-
-void insn_test_failed(int test_nr, int test_round, void *buf,
-		void *buf_shadow, void *ptr)
-{
-	print_context(xsave_test_buf);
-	eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
-	while (test_nr == 5) {
-		struct mpx_bt_entry *bte;
-		struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
-		struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
-
-		printf("  bd: %p\n", bd);
-		printf("&bde: %p\n", bde);
-		printf("*bde: %lx\n", *(unsigned long *)bde);
-		if (!bd_entry_valid(bde))
-			break;
-
-		bte = mpx_vaddr_to_bt_entry(buf, bd);
-		printf(" te: %p\n", bte);
-		printf("bte[0]: %lx\n", bte->contents[0]);
-		printf("bte[1]: %lx\n", bte->contents[1]);
-		printf("bte[2]: %lx\n", bte->contents[2]);
-		printf("bte[3]: %lx\n", bte->contents[3]);
-		break;
-	}
-	test_failed();
-}
-
-void check_mpx_insns_and_tables(void)
-{
-	int successes = 0;
-	int failures  = 0;
-	int buf_size = (1024*1024);
-	unsigned long *buf = malloc(buf_size);
-	const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
-	int i, j;
-
-	memset(buf, 0, buf_size);
-	memset(buf_shadow, 0, sizeof(buf_shadow));
-
-	for (i = 0; i < TEST_ROUNDS; i++) {
-		uint8_t *ptr = get_random_addr() + 8;
-
-		for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
-			if (0 && j != 5) {
-				successes++;
-				continue;
-			}
-			dprintf2("starting test %d round %d\n", j, i);
-			dprint_context(xsave_test_buf);
-			/*
-			 * test5 loads an address from the bounds tables.
-			 * The load will only complete if 'ptr' matches
-			 * the load and the store, so with random addrs,
-			 * the odds of this are very small.  Make it
-			 * higher by only moving 'ptr' 1/10 times.
-			 */
-			if (random() % 10 <= 0)
-				ptr = get_random_addr() + 8;
-			dprintf3("random ptr{%p}\n", ptr);
-			dprint_context(xsave_test_buf);
-			run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
-			dprint_context(xsave_test_buf);
-			if (!compare_context(xsave_test_buf)) {
-				insn_test_failed(j, i, buf, buf_shadow, ptr);
-				failures++;
-				goto exit;
-			}
-			successes++;
-			dprint_context(xsave_test_buf);
-			dprintf2("finished test %d round %d\n", j, i);
-			dprintf3("\n");
-			dprint_context(xsave_test_buf);
-		}
-	}
-
-exit:
-	dprintf2("\nabout to free:\n");
-	free(buf);
-	dprintf1("successes: %d\n", successes);
-	dprintf1(" failures: %d\n", failures);
-	dprintf1("    tests: %d\n", total_nr_tests);
-	dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
-	dprintf1("      saw: %d #BRs\n", br_count);
-	if (failures) {
-		eprintf("ERROR: non-zero number of failures\n");
-		exit(20);
-	}
-	if (successes != total_nr_tests) {
-		eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
-				successes, total_nr_tests);
-		exit(21);
-	}
-	if (num_upper_brs + num_lower_brs != br_count) {
-		eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
-				num_upper_brs, num_lower_brs, br_count);
-		eprintf("successes: %d\n", successes);
-		eprintf(" failures: %d\n", failures);
-		eprintf("    tests: %d\n", total_nr_tests);
-		eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
-		eprintf("      saw: %d #BRs\n", br_count);
-		exit(22);
-	}
-}
-
-/*
- * This is supposed to SIGSEGV nicely once the kernel
- * can no longer allocate vaddr space.
- */
-void exhaust_vaddr_space(void)
-{
-	unsigned long ptr;
-	/* Try to make sure there is no room for a bounds table anywhere */
-	unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
-#ifdef __i386__
-	unsigned long max_vaddr = 0xf7788000UL;
-#else
-	unsigned long max_vaddr = 0x800000000000UL;
-#endif
-
-	dprintf1("%s() start\n", __func__);
-	/* do not start at 0, we aren't allowed to map there */
-	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
-		void *ptr_ret;
-		int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
-
-		if (!ret) {
-			dprintf1("madvise() %lx ret: %d\n", ptr, ret);
-			continue;
-		}
-		ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
-				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-		if (ptr_ret != (void *)ptr) {
-			perror("mmap");
-			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
-			break;
-		}
-		if (!(ptr & 0xffffff))
-			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
-	}
-	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
-		dprintf2("covering 0x%lx with bounds table entries\n", ptr);
-		cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
-	}
-	dprintf1("%s() end\n", __func__);
-	printf("done with vaddr space fun\n");
-}
-
-void mpx_table_test(void)
-{
-	printf("starting mpx bounds table test\n");
-	run_timed_test(check_mpx_insns_and_tables);
-	printf("done with mpx bounds table test\n");
-}
-
-int main(int argc, char **argv)
-{
-	int unmaptest = 0;
-	int vaddrexhaust = 0;
-	int tabletest = 0;
-	int i;
-
-	check_mpx_support();
-	mpx_prepare();
-	srandom(11179);
-
-	bd_incore();
-	init();
-	bd_incore();
-
-	trace_me();
-
-	xsave_state((void *)xsave_test_buf, 0x1f);
-	if (!compare_context(xsave_test_buf))
-		printf("Init failed\n");
-
-	for (i = 1; i < argc; i++) {
-		if (!strcmp(argv[i], "unmaptest"))
-			unmaptest = 1;
-		if (!strcmp(argv[i], "vaddrexhaust"))
-			vaddrexhaust = 1;
-		if (!strcmp(argv[i], "tabletest"))
-			tabletest = 1;
-	}
-	if (!(unmaptest || vaddrexhaust || tabletest)) {
-		unmaptest = 1;
-		/* vaddrexhaust = 1; */
-		tabletest = 1;
-	}
-	if (unmaptest)
-		check_bounds_table_frees();
-	if (tabletest)
-		mpx_table_test();
-	if (vaddrexhaust)
-		exhaust_vaddr_space();
-	printf("%s completed successfully\n", argv[0]);
-	exit(0);
-}
-
-#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
deleted file mode 100644
index 6dbdd66..0000000
--- a/tools/testing/selftests/x86/mpx-mm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_MM_H
-#define _MPX_MM_H
-
-#define PAGE_SIZE 4096
-#define MB (1UL<<20)
-
-extern long nr_incore(void *ptr, unsigned long size_bytes);
-
-#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 460b4bd..480995b 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
  *
  * There are examples in here of:
  *  * how to set protection keys on memory
@@ -1133,6 +1133,21 @@
 	pkey_assert(err);
 }
 
+void become_child(void)
+{
+	pid_t forkret;
+
+	forkret = fork();
+	pkey_assert(forkret >= 0);
+	dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+	if (!forkret) {
+		/* in the child */
+		return;
+	}
+	exit(0);
+}
+
 /* Assumes that all pkeys other than 'pkey' are unallocated */
 void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 {
@@ -1141,7 +1156,7 @@
 	int nr_allocated_pkeys = 0;
 	int i;
 
-	for (i = 0; i < NR_PKEYS*2; i++) {
+	for (i = 0; i < NR_PKEYS*3; i++) {
 		int new_pkey;
 		dprintf1("%s() alloc loop: %d\n", __func__, i);
 		new_pkey = alloc_pkey();
@@ -1152,21 +1167,27 @@
 		if ((new_pkey == -1) && (errno == ENOSPC)) {
 			dprintf2("%s() failed to allocate pkey after %d tries\n",
 				__func__, nr_allocated_pkeys);
-			break;
+		} else {
+			/*
+			 * Ensure the number of successes never
+			 * exceeds the number of keys supported
+			 * in the hardware.
+			 */
+			pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+			allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
 		}
-		pkey_assert(nr_allocated_pkeys < NR_PKEYS);
-		allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+
+		/*
+		 * Make sure that allocation state is properly
+		 * preserved across fork().
+		 */
+		if (i == NR_PKEYS*2)
+			become_child();
 	}
 
 	dprintf3("%s()::%d\n", __func__, __LINE__);
 
 	/*
-	 * ensure it did not reach the end of the loop without
-	 * failure:
-	 */
-	pkey_assert(i < NR_PKEYS*2);
-
-	/*
 	 * There are 16 pkeys supported in hardware.  Three are
 	 * allocated by the time we get here:
 	 *   1. The default key (0)
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 4d9dc3f..57c4f67 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
  * Copyright (c) 2014-2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * This is a series of tests that exercises the sigreturn(2) syscall and
  * the IRET / SYSRET paths in the kernel.
  *
@@ -459,6 +451,19 @@
 	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
 	ctx->uc_mcontext.gregs[REG_CX] = 0;
 
+#ifdef __i386__
+	/*
+	 * Make sure the kernel doesn't inadvertently use DS or ES-relative
+	 * accesses in a region where user DS or ES is loaded.
+	 *
+	 * Skip this for 64-bit builds because long mode doesn't care about
+	 * DS and ES and skipping it increases test coverage a little bit,
+	 * since 64-bit kernels can still run the 32-bit build.
+	 */
+	ctx->uc_mcontext.gregs[REG_DS] = 0;
+	ctx->uc_mcontext.gregs[REG_ES] = 0;
+#endif
+
 	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
 
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index ddfdd63..50ce6c3 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * single_step_syscall.c - single-steps various x86 syscalls
  * Copyright (c) 2014-2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * This is a very simple series of tests that makes system calls with
  * the TF flag set.  This exercises some nasty kernel code in the
  * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 7db4fc9..bc0ecc2 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
  * Copyright (c) 2015 Andrew Lutomirski
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #define _GNU_SOURCE
@@ -23,9 +15,30 @@
 #include <setjmp.h>
 #include <errno.h>
 
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
 /* Our sigaltstack scratch space. */
 static unsigned char altstack_data[SIGSTKSZ];
 
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -43,13 +56,22 @@
 
 static volatile sig_atomic_t n_errs;
 
-static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+#ifdef __x86_64__
+#define REG_AX REG_RAX
+#define REG_IP REG_RIP
+#else
+#define REG_AX REG_EAX
+#define REG_IP REG_EIP
+#endif
+
+static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
 
-	if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
-		printf("[FAIL]\tAX had the wrong value: 0x%x\n",
-		       ctx->uc_mcontext.gregs[REG_EAX]);
+	if (ax != -EFAULT && ax != -ENOSYS) {
+		printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
+		       (unsigned long)ax);
 		n_errs++;
 	} else {
 		printf("[OK]\tSeems okay\n");
@@ -58,9 +80,42 @@
 	siglongjmp(jmpbuf, 1);
 }
 
+static volatile sig_atomic_t sigtrap_consecutive_syscalls;
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	/*
+	 * KVM has some bugs that can cause us to stop making progress.
+	 * detect them and complain, but don't infinite loop or fail the
+	 * test.
+	 */
+
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x340f || *ip == 0x050f) {
+		/* The trap was on SYSCALL or SYSENTER */
+		sigtrap_consecutive_syscalls++;
+		if (sigtrap_consecutive_syscalls > 3) {
+			printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
+			siglongjmp(jmpbuf, 1);
+		}
+	} else {
+		sigtrap_consecutive_syscalls = 0;
+	}
+}
+
 static void sigill(int sig, siginfo_t *info, void *ctx_void)
 {
-	printf("[SKIP]\tIllegal instruction\n");
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x0b0f) {
+		/* one of the ud2 instructions faulted */
+		printf("[OK]\tSYSCALL returned normally\n");
+	} else {
+		printf("[SKIP]\tIllegal instruction\n");
+	}
 	siglongjmp(jmpbuf, 1);
 }
 
@@ -73,7 +128,13 @@
 	if (sigaltstack(&stack, NULL) != 0)
 		err(1, "sigaltstack");
 
-	sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+	sethandler(SIGSEGV, sigsegv_or_sigbus, SA_ONSTACK);
+	/*
+	 * The actual exception can vary.  On Atom CPUs, we get #SS
+	 * instead of #PF when the vDSO fails to access the stack when
+	 * ESP is too close to 2^32, and #SS causes SIGBUS.
+	 */
+	sethandler(SIGBUS, sigsegv_or_sigbus, SA_ONSTACK);
 	sethandler(SIGILL, sigill, SA_ONSTACK);
 
 	/*
@@ -122,9 +183,48 @@
 			"movl $-1, %%ebp\n\t"
 			"movl $-1, %%esp\n\t"
 			"syscall\n\t"
-			"pushl $0"	/* make sure we segfault cleanly */
+			"ud2"		/* make sure we recover cleanly */
 			: : : "memory", "flags");
 	}
 
+	printf("[RUN]\tSYSENTER with TF and invalid state\n");
+	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"sysenter"
+			: : : "memory", "flags");
+	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+	printf("[RUN]\tSYSCALL with TF and invalid state\n");
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"syscall\n\t"
+			"ud2"		/* make sure we recover cleanly */
+			: : : "memory", "flags");
+	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
 	return 0;
 }
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 43fcab3..02309a1 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * syscall_nt.c - checks syscalls with NT set
  * Copyright (c) 2014-2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * Some obscure user-space code requires the ability to make system calls
  * with FLAGS.NT set.  Make sure it works.
  */
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
new file mode 100644
index 0000000..d6b09cb
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2018 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <syscall.h>
+
+static int nerrs;
+
+#define X32_BIT 0x40000000UL
+
+static void check_enosys(unsigned long nr, bool *ok)
+{
+	/* If this fails, a segfault is reasonably likely. */
+	fflush(stdout);
+
+	long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
+	if (ret == 0) {
+		printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
+		*ok = false;
+	} else if (errno != ENOSYS) {
+		printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
+		*ok = false;
+	}
+}
+
+static void test_x32_without_x32_bit(void)
+{
+	bool ok = true;
+
+	/*
+	 * Syscalls 512-547 are "x32" syscalls.  They are intended to be
+	 * called with the x32 (0x40000000) bit set.  Calling them without
+	 * the x32 bit set is nonsense and should not work.
+	 */
+	printf("[RUN]\tChecking syscalls 512-547\n");
+	for (int i = 512; i <= 547; i++)
+		check_enosys(i, &ok);
+
+	/*
+	 * Check that a handful of 64-bit-only syscalls are rejected if the x32
+	 * bit is set.
+	 */
+	printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
+	check_enosys(16 | X32_BIT, &ok);	/* ioctl */
+	check_enosys(19 | X32_BIT, &ok);	/* readv */
+	check_enosys(20 | X32_BIT, &ok);	/* writev */
+
+	/*
+	 * Check some syscalls with high bits set.
+	 */
+	printf("[RUN]\tChecking numbers above 2^32-1\n");
+	check_enosys((1UL << 32), &ok);
+	check_enosys(X32_BIT | (1UL << 32), &ok);
+
+	if (!ok)
+		nerrs++;
+	else
+		printf("[OK]\tThey all returned -ENOSYS\n");
+}
+
+int main()
+{
+	/*
+	 * Anyone diagnosing a failure will want to know whether the kernel
+	 * supports x32.  Tell them.
+	 */
+	printf("\tChecking for x32...");
+	fflush(stdout);
+	if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
+		printf(" supported\n");
+	} else if (errno == ENOSYS) {
+		printf(" not supported\n");
+	} else {
+		printf(" confused\n");
+	}
+
+	test_x32_without_x32_bit();
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index d85ec5b..84d74be 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
  * Copyright (c) 2014-2016 Andrew Lutomirski
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #define _GNU_SOURCE
diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c
index ce42d5a..5f3d4fc 100644
--- a/tools/testing/selftests/x86/sysret_ss_attrs.c
+++ b/tools/testing/selftests/x86/sysret_ss_attrs.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
  * Copyright (c) 2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
  * the hidden attributes set to an unusable state.  Make sure the kernel
  * doesn't let this happen.
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index 64f11c8..f0d876d 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 32-bit test to check vDSO mremap.
  *
  * Copyright (c) 2016 Dmitry Safonov
  * Suggested-by: Andrew Lutomirski
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Can be built statically:
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
index c9c3281..8965c31 100644
--- a/tools/testing/selftests/x86/test_syscall_vdso.c
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 32-bit syscall ABI conformance test.
  *
  * Copyright (c) 2015 Denys Vlasenko
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Can be built statically:
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 0b4f1cc..a4f4d4c 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -18,6 +18,7 @@
 #include <sched.h>
 #include <stdbool.h>
 #include <setjmp.h>
+#include <sys/uio.h>
 
 #ifdef __x86_64__
 # define VSYS(x) (x)
@@ -49,21 +50,21 @@
 }
 
 /* vsyscalls and vDSO */
-bool should_read_vsyscall = false;
+bool vsyscall_map_r = false, vsyscall_map_x = false;
 
 typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
-gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
 gtod_t vdso_gtod;
 
 typedef int (*vgettime_t)(clockid_t, struct timespec *);
 vgettime_t vdso_gettime;
 
 typedef long (*time_func_t)(time_t *t);
-time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
 time_func_t vdso_time;
 
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
 getcpu_t vdso_getcpu;
 
 static void init_vdso(void)
@@ -107,7 +108,7 @@
 	maps = fopen("/proc/self/maps", "r");
 	if (!maps) {
 		printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
-		should_read_vsyscall = true;
+		vsyscall_map_r = true;
 		return 0;
 	}
 
@@ -133,12 +134,8 @@
 		}
 
 		printf("\tvsyscall permissions are %c-%c\n", r, x);
-		should_read_vsyscall = (r == 'r');
-		if (x != 'x') {
-			vgtod = NULL;
-			vtime = NULL;
-			vgetcpu = NULL;
-		}
+		vsyscall_map_r = (r == 'r');
+		vsyscall_map_x = (x == 'x');
 
 		found = true;
 		break;
@@ -148,10 +145,8 @@
 
 	if (!found) {
 		printf("\tno vsyscall map in /proc/self/maps\n");
-		should_read_vsyscall = false;
-		vgtod = NULL;
-		vtime = NULL;
-		vgetcpu = NULL;
+		vsyscall_map_r = false;
+		vsyscall_map_x = false;
 	}
 
 	return nerrs;
@@ -183,9 +178,13 @@
 }
 
 static jmp_buf jmpbuf;
+static volatile unsigned long segv_err;
 
 static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
 {
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+
+	segv_err =  ctx->uc_mcontext.gregs[REG_ERR];
 	siglongjmp(jmpbuf, 1);
 }
 
@@ -238,7 +237,7 @@
 		err(1, "syscall gettimeofday");
 	if (vdso_gtod)
 		ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
-	if (vgtod)
+	if (vsyscall_map_x)
 		ret_vsys = vgtod(&tv_vsys, &tz_vsys);
 	if (sys_gtod(&tv_sys2, &tz_sys) != 0)
 		err(1, "syscall gettimeofday");
@@ -252,7 +251,7 @@
 		}
 	}
 
-	if (vgtod) {
+	if (vsyscall_map_x) {
 		if (ret_vsys == 0) {
 			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
 		} else {
@@ -273,7 +272,7 @@
 	t_sys1 = sys_time(&t2_sys1);
 	if (vdso_time)
 		t_vdso = vdso_time(&t2_vdso);
-	if (vtime)
+	if (vsyscall_map_x)
 		t_vsys = vtime(&t2_vsys);
 	t_sys2 = sys_time(&t2_sys2);
 	if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
@@ -294,7 +293,7 @@
 		}
 	}
 
-	if (vtime) {
+	if (vsyscall_map_x) {
 		if (t_vsys < 0 || t_vsys != t2_vsys) {
 			printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
 			nerrs++;
@@ -330,7 +329,7 @@
 	ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
 	if (vdso_getcpu)
 		ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
-	if (vgetcpu)
+	if (vsyscall_map_x)
 		ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
 
 	if (ret_sys == 0) {
@@ -369,7 +368,7 @@
 		}
 	}
 
-	if (vgetcpu) {
+	if (vsyscall_map_x) {
 		if (ret_vsys) {
 			printf("[FAIL]\tvsyscall getcpu() failed\n");
 			nerrs++;
@@ -410,20 +409,88 @@
 		can_read = false;
 	}
 
-	if (can_read && !should_read_vsyscall) {
+	if (can_read && !vsyscall_map_r) {
 		printf("[FAIL]\tWe have read access, but we shouldn't\n");
 		return 1;
-	} else if (!can_read && should_read_vsyscall) {
+	} else if (!can_read && vsyscall_map_r) {
 		printf("[FAIL]\tWe don't have read access, but we should\n");
 		return 1;
+	} else if (can_read) {
+		printf("[OK]\tWe have read access\n");
 	} else {
-		printf("[OK]\tgot expected result\n");
+		printf("[OK]\tWe do not have read access: #PF(0x%lx)\n",
+		       segv_err);
 	}
 #endif
 
 	return 0;
 }
 
+static int test_vsys_x(void)
+{
+#ifdef __x86_64__
+	if (vsyscall_map_x) {
+		/* We already tested this adequately. */
+		return 0;
+	}
+
+	printf("[RUN]\tMake sure that vsyscalls really page fault\n");
+
+	bool can_exec;
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		vgtod(NULL, NULL);
+		can_exec = true;
+	} else {
+		can_exec = false;
+	}
+
+	if (can_exec) {
+		printf("[FAIL]\tExecuting the vsyscall did not page fault\n");
+		return 1;
+	} else if (segv_err & (1 << 4)) { /* INSTR */
+		printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n",
+		       segv_err);
+	} else {
+		printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n",
+		       segv_err);
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+static int test_process_vm_readv(void)
+{
+#ifdef __x86_64__
+	char buf[4096];
+	struct iovec local, remote;
+	int ret;
+
+	printf("[RUN]\tprocess_vm_readv() from vsyscall page\n");
+
+	local.iov_base = buf;
+	local.iov_len = 4096;
+	remote.iov_base = (void *)0xffffffffff600000;
+	remote.iov_len = 4096;
+	ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
+	if (ret != 4096) {
+		printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
+		return 0;
+	}
+
+	if (vsyscall_map_r) {
+		if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+			printf("[OK]\tIt worked and read correct data\n");
+		} else {
+			printf("[FAIL]\tIt worked but returned incorrect data\n");
+			return 1;
+		}
+	}
+#endif
+
+	return 0;
+}
 
 #ifdef __x86_64__
 #define X86_EFLAGS_TF (1UL << 8)
@@ -455,7 +522,7 @@
 	time_t tmp;
 	bool is_native;
 
-	if (!vtime)
+	if (!vsyscall_map_x)
 		return 0;
 
 	printf("[RUN]\tchecking that vsyscalls are emulated\n");
@@ -497,6 +564,9 @@
 
 	sethandler(SIGSEGV, sigsegv, 0);
 	nerrs += test_vsys_r();
+	nerrs += test_vsys_x();
+
+	nerrs += test_process_vm_readv();
 
 #ifdef __x86_64__
 	nerrs += test_emulation();
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
index ce8a995..1bb5d62 100644
--- a/tools/testing/selftests/x86/thunks.S
+++ b/tools/testing/selftests/x86/thunks.S
@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * thunks.S - assembly helpers for mixed-bitness code
  * Copyright (c) 2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * These are little helpers that make it easier to switch bitness on
  * the fly.
  */
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
index 29b644b..a71d92d 100644
--- a/tools/testing/selftests/x86/thunks_32.S
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * thunks_32.S - assembly helpers for mixed-bitness code
  * Copyright (c) 2015 Denys Vlasenko
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * These are little helpers that make it easier to switch bitness on
  * the fly.
  */
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
index fabdf0f..aa1f58c 100644
--- a/tools/testing/selftests/x86/trivial_32bit_program.c
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Trivial program to check that we have a valid 32-bit build environment.
  * Copyright (c) 2015 Andy Lutomirski
- * GPL v2
  */
 
 #ifndef __i386__
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
index 05c6a41..39f4b84 100644
--- a/tools/testing/selftests/x86/trivial_64bit_program.c
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Trivial program to check that we have a valid 64-bit build environment.
  * Copyright (c) 2015 Andy Lutomirski
- * GPL v2
  */
 
 #ifndef __x86_64__
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 00a26a8..0075ccd 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
  * Copyright (c) 2014-2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * This tests __kernel_vsyscall's unwind info.
  */
 
@@ -44,7 +36,6 @@
 #include <stdbool.h>
 #include <sys/ptrace.h>
 #include <sys/user.h>
-#include <sys/ucontext.h>
 #include <link.h>
 #include <sys/auxv.h>
 #include <dlfcn.h>
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index cb03842..29a5c94 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * vdso_restorer.c - tests vDSO-based signal restore
  * Copyright (c) 2015 Andrew Lutomirski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * This makes sure that sa_restorer == NULL keeps working on 32-bit
  * configurations.  Modern glibc doesn't use it under any circumstances,
  * so it's easy to overlook breakage.