Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 644770c..0830e63 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -19,6 +19,7 @@
 	   copyloops		\
 	   dscr			\
 	   mm			\
+	   nx-gzip		\
 	   pmu			\
 	   signal		\
 	   primitives		\
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 6d4fd01..28bc6ca 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 copy_first_unaligned
 alignment_handler
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0725239..c25cf7c 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
  * This selftest exercises the powerpc alignment fault handler.
  *
  * We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists.  Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ *         ./alignment_handler /dev/mem 0xc0000000
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
@@ -45,14 +55,16 @@
 #include <setjmp.h>
 #include <signal.h>
 
-#include <asm/cputable.h>
-
 #include "utils.h"
+#include "instructions.h"
 
 int bufsize;
 int debug;
 int testing;
 volatile int gotsig;
+bool prefixes_enabled;
+char *cipath = "/dev/fb0";
+long cioffset;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
@@ -64,7 +76,12 @@
 	}
 	gotsig = sig;
 #ifdef __powerpc64__
-	ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+	if (prefixes_enabled) {
+		u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+		ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+	} else {
+		ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+	}
 #else
 	ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
 #endif
@@ -84,6 +101,17 @@
 	}							\
 	rc |= do_test(#name, test_##name)
 
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg)		\
+	void test_##name(char *s, char *d)			\
+	{							\
+		asm volatile(					\
+			ld_op(ld_reg, %0, 0, 0)			\
+			st_op(st_reg, %1, 0, 0)			\
+			:: "r"(s), "r"(d), "r"(0)		\
+			: "memory", "vs0", "vs32", "r31");	\
+	}							\
+	rc |= do_test(#name, test_##name)
+
 #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
 #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
 #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +131,17 @@
 #define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
 #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
 
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
 
 /* FIXME: Unimplemented tests: */
 // STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
@@ -195,17 +234,18 @@
 
 	printf("\tDoing %s:\t", test_name);
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0) {
 		printf("\n");
-		perror("Can't open /dev/fb0 now?");
+		perror("Can't open ci file now?");
 		return 1;
 	}
 
-	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, 0x0);
-	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, bufsize);
+	ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset + bufsize);
+
 	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
 		printf("\n");
 		perror("mmap failed");
@@ -226,8 +266,12 @@
 	}
 
 	rc = 0;
-	/* offset = 0 no alignment fault, so skip */
-	for (offset = 1; offset < 16; offset++) {
+	/*
+	 * offset = 0 is aligned but tests the workaround for the P9N
+	 * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s:
+	 * Add workaround for P9 vector CI load issue")
+	 */
+	for (offset = 0; offset < 16; offset++) {
 		width = 16; /* vsx == 16 bytes */
 		r = 0;
 
@@ -270,11 +314,11 @@
 	return rc;
 }
 
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
 {
 	int fd;
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0)
 		return false;
 
@@ -286,7 +330,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("VSX: 2.06B\n");
@@ -304,7 +348,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	printf("VSX: 2.07B\n");
@@ -320,7 +364,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
 	printf("VSX: 3.00B\n");
@@ -348,11 +392,30 @@
 	return rc;
 }
 
+int test_alignment_handler_vsx_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("VSX: PREFIX\n");
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+	return rc;
+}
+
 int test_alignment_handler_integer(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Integer\n");
 	LOAD_DFORM_TEST(lbz);
@@ -411,7 +474,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Integer: 2.06\n");
@@ -422,11 +485,32 @@
 	return rc;
 }
 
+int test_alignment_handler_integer_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Integer: PREFIX\n");
+	LOAD_MLS_PREFIX_TEST(PLBZ);
+	LOAD_MLS_PREFIX_TEST(PLHZ);
+	LOAD_MLS_PREFIX_TEST(PLHA);
+	LOAD_MLS_PREFIX_TEST(PLWZ);
+	LOAD_8LS_PREFIX_TEST(PLWA);
+	LOAD_8LS_PREFIX_TEST(PLD);
+	STORE_MLS_PREFIX_TEST(PSTB);
+	STORE_MLS_PREFIX_TEST(PSTH);
+	STORE_MLS_PREFIX_TEST(PSTW);
+	STORE_8LS_PREFIX_TEST(PSTD);
+	return rc;
+}
+
 int test_alignment_handler_vmx(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
 
 	printf("VMX\n");
@@ -454,7 +538,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Floating point\n");
 	LOAD_FLOAT_DFORM_TEST(lfd);
@@ -482,7 +566,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
 
 	printf("Floating point: 2.05\n");
@@ -500,7 +584,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Floating point: 2.06\n");
@@ -510,13 +594,32 @@
 	return rc;
 }
 
+
+int test_alignment_handler_fp_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Floating point: PREFIX\n");
+	LOAD_FLOAT_DFORM_TEST(lfs);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+	return rc;
+}
+
 void usage(char *prog)
 {
-	printf("Usage: %s [options]\n", prog);
+	printf("Usage: %s [options] [path [offset]]\n", prog);
 	printf("  -d	Enable debug error output\n");
 	printf("\n");
-	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
-	printf("framebuffer at /dev/fb0.\n");
+	printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+	printf("and either a usable framebuffer at /dev/fb0 or ");
+	printf("the path to usable cache inhibited memory and optional ");
+	printf("offset to be provided\n");
 }
 
 int main(int argc, char *argv[])
@@ -536,6 +639,13 @@
 			exit(1);
 		}
 	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc > 0)
+		cipath = argv[0];
+	if (argc > 1)
+		cioffset = strtol(argv[1], 0, 0x10);
 
 	bufsize = getpagesize();
 
@@ -549,16 +659,22 @@
 		exit(1);
 	}
 
+	prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
 	rc |= test_harness(test_alignment_handler_vsx_206,
 			   "test_alignment_handler_vsx_206");
 	rc |= test_harness(test_alignment_handler_vsx_207,
 			   "test_alignment_handler_vsx_207");
 	rc |= test_harness(test_alignment_handler_vsx_300,
 			   "test_alignment_handler_vsx_300");
+	rc |= test_harness(test_alignment_handler_vsx_prefix,
+			   "test_alignment_handler_vsx_prefix");
 	rc |= test_harness(test_alignment_handler_integer,
 			   "test_alignment_handler_integer");
 	rc |= test_harness(test_alignment_handler_integer_206,
 			   "test_alignment_handler_integer_206");
+	rc |= test_harness(test_alignment_handler_integer_prefix,
+			   "test_alignment_handler_integer_prefix");
 	rc |= test_harness(test_alignment_handler_vmx,
 			   "test_alignment_handler_vmx");
 	rc |= test_harness(test_alignment_handler_fp,
@@ -567,5 +683,7 @@
 			   "test_alignment_handler_fp_205");
 	rc |= test_harness(test_alignment_handler_fp_206,
 			   "test_alignment_handler_fp_206");
+	rc |= test_harness(test_alignment_handler_fp_prefix,
+			   "test_alignment_handler_fp_prefix");
 	return rc;
 }
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 9161679..c9ce139 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gettimeofday
 context_switch
 fork
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index d40300a..a32a6ab 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -2,6 +2,8 @@
 TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
 TEST_GEN_FILES := exec_target
 
+TEST_FILES := settings
+
 CFLAGS += -O2
 
 top_srcdir = ../../../../..
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index d50cc05..96554e2 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -481,6 +481,12 @@
 	else
 		printf("futex");
 
+	if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+		touch_altivec = 0;
+
+	if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+		touch_vector = 0;
+
 	printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
 	       cpu1, cpu2, touch_fp ?  "yes" : "no", touch_altivec ? "yes" : "no",
 	       touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/benchmarks/settings b/tools/testing/selftests/powerpc/benchmarks/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
index ec18484..b385eee 100644
--- a/tools/testing/selftests/powerpc/cache_shape/.gitignore
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 cache_shape
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 12ef5b0..994b11a 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 copyuser_64_t0
 copyuser_64_t1
 copyuser_64_t2
@@ -11,4 +12,4 @@
 copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983..3095b1f 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@
 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
-		memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
 		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@
 		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
 		-o $@ $^
 
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) \
-		-D COPY_LOOP=test_memcpy_mcsafe \
+		-D COPY_LOOP=test_copy_mc_generic \
 		-o $@ $^
 
 $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 0000000..dcbe06d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3..0000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
index b585c6c..1d08b15 100644
--- a/tools/testing/selftests/powerpc/dscr/.gitignore
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 dscr_default_test
 dscr_explicit_test
 dscr_inherit_exec_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 5df4763..845db62 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,9 +3,11 @@
 	      dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test	\
 	      dscr_sysfs_thread_test
 
+TEST_FILES := settings
+
 top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
 
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2..e76611e 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -63,6 +63,8 @@
 	unsigned long i, *status[THREADS];
 	unsigned long orig_dscr_default;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 
 	/* Initial DSCR default */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d..32fcf2b 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -21,6 +21,8 @@
 {
 	unsigned long i, dscr = 0;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	srand(getpid());
 	set_dscr(dscr);
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46..c6a81b2 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@
 	unsigned long i, dscr = 0;
 	pid_t pid;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	for (i = 0; i < COUNT; i++) {
 		dscr++;
 		if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a6..f9dfd3d 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -22,6 +22,8 @@
 	unsigned long i, dscr = 0;
 	pid_t pid;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	srand(getpid());
 	set_dscr(dscr);
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4e..fbbdffd 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -77,6 +77,8 @@
 	unsigned long orig_dscr_default;
 	int i, j;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 	for (i = 0; i < COUNT; i++) {
 		for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c2..191ed12 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@
 	unsigned long orig_dscr_default;
 	int i, j;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 	for (i = 0; i < COUNT; i++) {
 		for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d..e090724 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -36,6 +36,8 @@
 {
 	int i;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	check_dscr("");
 
 	for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 7c2cb04..64779f0 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -43,6 +43,11 @@
 		continue;
 	fi
 
+	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
+		echo "$dev, Skipped: ahci doesn't support recovery"
+		continue
+	fi
+
 	# Don't inject errosr into an already-frozen PE. This happens with
 	# PEs that contain multiple PCI devices (e.g. multi-function cards)
 	# and injecting new errors during the recovery process will probably
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061e..4efa631 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@
 #define PPC_INST_PASTE                 __PASTE(0, 0, 0, 0)
 #define PPC_INST_PASTE_LAST            __PASTE(0, 0, 1, 1)
 
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#else
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a)	(((a) & 0x1f) << 16)
+#define __PPC_RS(s)	(((s) & 0x1f) << 21)
+#define __PPC_RT(t)	__PPC_RS(t)
+#define __PPC_PREFIX_R(r)	(((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS			0x06000000
+#define PPC_PREFIX_8LS			0x04000000
+
+#define PPC_INST_LBZ			0x88000000
+#define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LHA			0xa8000000
+#define PPC_INST_LWZ			0x80000000
+#define PPC_INST_STB			0x98000000
+#define PPC_INST_STH			0xb0000000
+#define PPC_INST_STW			0x90000000
+#define PPC_INST_STD			0xf8000000
+#define PPC_INST_LFS			0xc0000000
+#define PPC_INST_LFD			0xc8000000
+#define PPC_INST_STFS			0xd0000000
+#define PPC_INST_STFD			0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_MLS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_8LS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d)		PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d)		PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d)			PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d)			PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d)		PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d)		PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d)		PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d)		PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d)		PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d)		PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d)		PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d)		PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d)		PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d)		PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d)		PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d)		PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d)		PREFIX_8LS(0xdc000000, s, a, r, d)
+
 #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 0000000..3312cb1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS	0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE	0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR	4
+#endif
+
+#define SI_PKEY_OFFSET	0x20
+
+#define __NR_pkey_mprotect	386
+#define __NR_pkey_alloc		384
+#define __NR_pkey_free		385
+
+#define PKEY_BITS_PER_PKEY	2
+#define NR_PKEYS		32
+#define PKEY_BITS_MASK		((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+	return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+	set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+	unsigned long amr, shift;
+
+	shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+	amr = pkeyreg_get();
+	amr &= ~(PKEY_BITS_MASK << shift);
+	amr |= (rights & PKEY_BITS_MASK) << shift;
+	pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+	return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+	return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+	bool hash_mmu = false;
+	int pkey;
+
+	/* Protection keys are currently supported on Hash MMU only */
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+	SKIP_IF(!hash_mmu);
+
+	/* Check if the system call is supported */
+	pkey = sys_pkey_alloc(0, 0);
+	SKIP_IF(pkey < 0);
+	sys_pkey_free(pkey);
+
+	return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+	/*
+	 * In older versions of libc, siginfo_t does not have si_pkey as
+	 * a member.
+	 */
+#ifdef si_pkey
+	return si->si_pkey;
+#else
+	return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({						\
+	static char buf[4] = "rwx";					\
+	unsigned int amr_bits;						\
+	if ((r) & PKEY_DISABLE_EXECUTE)					\
+		buf[2] = '-';						\
+	amr_bits = (r) & PKEY_BITS_MASK;				\
+	if (amr_bits & PKEY_DISABLE_WRITE)				\
+		buf[1] = '-';						\
+	if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE)	\
+		buf[0] = '-';						\
+	buf;								\
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+	if (rights == PKEY_DISABLE_ACCESS)
+		return PKEY_DISABLE_EXECUTE;
+	else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+		return 0;
+
+	if ((rights & PKEY_BITS_MASK) == 0)
+		rights |= PKEY_DISABLE_WRITE;
+	else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+		rights |= PKEY_DISABLE_ACCESS;
+
+	return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c507..c0f2742 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
 #define SPRN_PPR       896	/* Program Priority Register */
 #define SPRN_AMR       13	/* Authority Mask Register - problem state */
 
+#define set_amr(v)	asm volatile("isync;" \
+				     "mtspr " __stringify(SPRN_AMR) ",%0;" \
+				     "isync" : \
+				    : "r" ((unsigned long)(v)) \
+				    : "memory")
+
 /* TEXASR register bits */
 #define TEXASR_FC	0xFE00000000000000
 #define TEXASR_FP	0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 0e2b2e6..b7d188f 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -12,6 +12,7 @@
 #include <stdbool.h>
 #include <linux/auxvec.h>
 #include <linux/perf_event.h>
+#include <asm/cputable.h>
 #include "reg.h"
 
 /* Avoid headaches with PRI?64 - just use %ll? always */
@@ -34,13 +35,28 @@
 
 int read_debugfs_file(char *debugfs_file, int *result);
 int write_debugfs_file(char *debugfs_file, int result);
-void set_dscr(unsigned long val);
+int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
 int perf_event_open_counter(unsigned int type,
 			    unsigned long config, int group_fd);
 int perf_event_enable(int fd);
 int perf_event_disable(int fd);
 int perf_event_reset(int fd);
 
+struct perf_event_read {
+	__u64 nr;
+	__u64 l1d_misses;
+};
+
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+#endif
+
 static inline bool have_hwcap(unsigned long ftr)
 {
 	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -59,6 +75,7 @@
 #endif
 
 bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
 
 /* Yes, this is evil */
 #define FAIL_IF(x)						\
@@ -70,6 +87,15 @@
 	}							\
 } while (0)
 
+#define FAIL_IF_EXIT(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[FAIL] Test FAILED on line %d\n", __LINE__);	\
+		_exit(1);					\
+	}							\
+} while (0)
+
 /* The test harness uses this, yes it's gross */
 #define MAGIC_SKIP_RETURN_VALUE	99
 
@@ -95,11 +121,20 @@
 #define _str(s) #s
 #define str(s) _str(s)
 
+#define sigsafe_err(msg)	({ \
+		ssize_t nbytes __attribute__((unused)); \
+		nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
 /* POWER9 feature */
 #ifndef PPC_FEATURE2_ARCH_3_00
 #define PPC_FEATURE2_ARCH_3_00 0x00800000
 #endif
 
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
 #if defined(__powerpc64__)
 #define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
 #define UCONTEXT_MSR(UC)	(UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index 50ded63..d0c23b2 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 fpu_syscall
 vmx_syscall
 fpu_preempt
@@ -5,3 +6,4 @@
 fpu_signal
 vmx_signal
 vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7..fcc91c2 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
 top_srcdir = ../../../../..
 include ../../lib.mk
@@ -11,9 +11,9 @@
 $(OUTPUT)/fpu_preempt: fpu_asm.S
 $(OUTPUT)/fpu_signal:  fpu_asm.S
 
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
 
 $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 0000000..5f96682
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+	unsigned int m32;
+	unsigned long m64;
+	volatile float f;
+	volatile double d;
+
+	/* try to induce lfs <denormal> ; stfd */
+
+	m32 = 0x00715fcf; /* random denormal */
+	memcpy((float *)&f, &m32, sizeof(f));
+	d = f;
+	memcpy(&m64, (double *)&d, sizeof(d));
+
+	FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f1..6761d6c 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@
 	int i, rc, threads;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e..b340a5c 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@
 	void *rc_p;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293c..03c78df 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@
 	 * Setup an environment with much context switching
 	 */
 	pid_t pid2;
-	pid_t pid = fork();
+	pid_t pid;
 	int ret;
 	int child_ret;
+
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	pid = fork();
 	FAIL_IF(pid == -1);
 
 	pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6..d1601bb 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@
 	int i, rc, threads;
 	pthread_t *tids;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index d021172..aac4a59 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
@@ -5,4 +6,9 @@
 segv_errors
 wild_bctr
 large_vm_fork_separation
+bad_accesses
 tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index ed15658..defe488 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,21 +3,32 @@
 	$(MAKE) -C ../
 
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
-		  large_vm_fork_separation
+		  large_vm_fork_separation bad_accesses pkey_exec_prot \
+		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/prot_sao: ../utils.c
 
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
+$(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
 
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
 
 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
new file mode 100644
index 0000000..fd747b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that out-of-bounds reads/writes behave as expected.
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+// Old distros (Ubuntu 16.04 at least) don't define this
+#ifndef SEGV_BNDERR
+#define SEGV_BNDERR	3
+#endif
+
+// 64-bit kernel is always here
+#define PAGE_OFFSET	(0xcul << 60)
+
+static unsigned long kernel_virt_end;
+
+static volatile int fault_code;
+static volatile unsigned long fault_addr;
+static jmp_buf setjmp_env;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	fault_code = info->si_code;
+	fault_addr = (unsigned long)info->si_addr;
+	siglongjmp(setjmp_env, 1);
+}
+
+int bad_access(char *p, bool write)
+{
+	char x;
+
+	fault_code = 0;
+	fault_addr = 0;
+
+	if (sigsetjmp(setjmp_env, 1) == 0) {
+		if (write)
+			*p = 1;
+		else
+			x = *p;
+
+		printf("Bad - no SEGV! (%c)\n", x);
+		return 1;
+	}
+
+	// If we see MAPERR that means we took a page fault rather than an SLB
+	// miss. We only expect to take page faults for addresses within the
+	// valid kernel range.
+	FAIL_IF(fault_code == SEGV_MAPERR && \
+		(fault_addr < PAGE_OFFSET || fault_addr >= kernel_virt_end));
+
+	FAIL_IF(fault_code != SEGV_MAPERR && fault_code != SEGV_BNDERR);
+
+	return 0;
+}
+
+static int test(void)
+{
+	unsigned long i, j, addr, region_shift, page_shift, page_size;
+	struct sigaction sig;
+	bool hash_mmu;
+
+	sig = (struct sigaction) {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+
+	FAIL_IF(sigaction(SIGSEGV, &sig, NULL) != 0);
+
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size == (64 * 1024))
+		page_shift = 16;
+	else
+		page_shift = 12;
+
+	if (page_size == (64 * 1024) || !hash_mmu) {
+		region_shift = 52;
+
+		// We have 7 512T regions (4 kernel linear, vmalloc, io, vmemmap)
+		kernel_virt_end = PAGE_OFFSET + (7 * (512ul << 40));
+	} else if (page_size == (4 * 1024) && hash_mmu) {
+		region_shift = 46;
+
+		// We have 7 64T regions (4 kernel linear, vmalloc, io, vmemmap)
+		kernel_virt_end = PAGE_OFFSET + (7 * (64ul << 40));
+	} else
+		FAIL_IF(true);
+
+	printf("Using %s MMU, PAGE_SIZE = %dKB start address 0x%016lx\n",
+	       hash_mmu ? "hash" : "radix",
+	       (1 << page_shift) >> 10,
+	       1ul << region_shift);
+
+	// This generates access patterns like:
+	//   0x0010000000000000
+	//   0x0010000000010000
+	//   0x0010000000020000
+	//   ...
+	//   0x0014000000000000
+	//   0x0018000000000000
+	//   0x0020000000000000
+	//   0x0020000000010000
+	//   0x0020000000020000
+	//   ...
+	//   0xf400000000000000
+	//   0xf800000000000000
+
+	for (i = 1; i <= ((0xful << 60) >> region_shift); i++) {
+		for (j = page_shift - 1; j < 60; j++) {
+			unsigned long base, delta;
+
+			base  = i << region_shift;
+			delta = 1ul << j;
+
+			if (delta >= base)
+				break;
+
+			addr = (base | delta) & ~((1 << page_shift) - 1);
+
+			FAIL_IF(bad_access((char *)addr, false));
+			FAIL_IF(bad_access((char *)addr, true));
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(test, "bad_accesses");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 0000000..0af4f02
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_TRAP	0x7fe00008
+#define PPC_INST_BLR	0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr)
+		sigsafe_err("got a fault for an unexpected address\n");
+
+	_exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	int signal_pkey;
+
+	signal_pkey = siginfo_pkey(sinfo);
+	fault_code = sinfo->si_code;
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for a single test case */
+	if (!remaining_faults) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+
+	/* Restore permissions in order to continue */
+	switch (fault_code) {
+	case SEGV_ACCERR:
+		if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+			sigsafe_err("failed to set access permissions\n");
+			_exit(1);
+		}
+		break;
+	case SEGV_PKUERR:
+		if (signal_pkey != fault_pkey) {
+			sigsafe_err("got a fault for an unexpected pkey\n");
+			_exit(1);
+		}
+
+		switch (fault_type) {
+		case PKEY_DISABLE_ACCESS:
+			pkey_set_rights(fault_pkey, 0);
+			break;
+		case PKEY_DISABLE_EXECUTE:
+			/*
+			 * Reassociate the exec-only pkey with the region
+			 * to be able to continue. Unlike AMR, we cannot
+			 * set IAMR directly from userspace to restore the
+			 * permissions.
+			 */
+			if (mprotect(insns, pgsize, PROT_EXEC)) {
+				sigsafe_err("failed to set execute permissions\n");
+				_exit(1);
+			}
+			break;
+		default:
+			sigsafe_err("got a fault with an unexpected type\n");
+			_exit(1);
+		}
+		break;
+	default:
+		sigsafe_err("got a fault with an unexpected code\n");
+		_exit(1);
+	}
+
+	remaining_faults--;
+}
+
+static int test(void)
+{
+	struct sigaction segv_act, trap_act;
+	unsigned long rights;
+	int pkey, ret, i;
+
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Setup SIGSEGV handler */
+	segv_act.sa_handler = 0;
+	segv_act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+	segv_act.sa_flags = SA_SIGINFO;
+	segv_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+	/* Setup SIGTRAP handler */
+	trap_act.sa_handler = 0;
+	trap_act.sa_sigaction = trap_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+	trap_act.sa_flags = SA_SIGINFO;
+	trap_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+	/* Setup executable region */
+	pgsize = getpagesize();
+	numinsns = pgsize / sizeof(unsigned int);
+	insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+				      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(insns == MAP_FAILED);
+
+	/* Write the instruction words */
+	for (i = 1; i < numinsns - 1; i++)
+		insns[i] = PPC_INST_NOP;
+
+	/*
+	 * Set the first instruction as an unconditional trap. If
+	 * the last write to this address succeeds, this should
+	 * get overwritten by a no-op.
+	 */
+	insns[0] = PPC_INST_TRAP;
+
+	/*
+	 * Later, to jump to the executable region, we use a branch
+	 * and link instruction (bctrl) which sets the return address
+	 * automatically in LR. Use that to return back.
+	 */
+	insns[numinsns - 1] = PPC_INST_BLR;
+
+	/* Allocate a pkey that restricts execution */
+	rights = PKEY_DISABLE_EXECUTE;
+	pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF(pkey < 0);
+
+	/*
+	 * Pick the first instruction's address from the executable
+	 * region.
+	 */
+	fault_addr = insns;
+
+	/* The following two cases will avoid SEGV_PKUERR */
+	fault_type = -1;
+	fault_pkey = -1;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should not generate a fault as having PROT_EXEC
+	 * implies PROT_READ on GNU systems. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 0;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should generate an access fault as having just
+	 * PROT_EXEC also restricts writes. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_TRAP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* The following three cases will generate SEGV_PKUERR */
+	rights |= PKEY_DISABLE_ACCESS;
+	fault_type = PKEY_DISABLE_ACCESS;
+	fault_pkey = pkey;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate a pkey fault based on AMR bits only
+	 * as having PROT_EXEC implicitly allows reads.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate two faults. First, a pkey fault
+	 * based on AMR bits and then an access fault since
+	 * PROT_EXEC does not allow writes.
+	 */
+	remaining_faults = 2;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_NOP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* Free the current pkey */
+	sys_pkey_free(pkey);
+
+	rights = 0;
+	do {
+		/*
+		 * Allocate pkeys with all valid combinations of read,
+		 * write and execute restrictions.
+		 */
+		pkey = sys_pkey_alloc(0, rights);
+		FAIL_IF(pkey < 0);
+
+		/*
+		 * Jump to the executable region. AMR bits may or may not
+		 * be set but they should not affect execution.
+		 *
+		 * This should generate pkey faults based on IAMR bits which
+		 * may be set to restrict execution.
+		 *
+		 * The first iteration also checks if the overwrite of the
+		 * first instruction word from a trap to a no-op succeeded.
+		 */
+		fault_pkey = pkey;
+		fault_type = -1;
+		remaining_faults = 0;
+		if (rights & PKEY_DISABLE_EXECUTE) {
+			fault_type = PKEY_DISABLE_EXECUTE;
+			remaining_faults = 1;
+		}
+
+		FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+		printf("execute at %p, pkey permissions are %s\n", fault_addr,
+		       pkey_rights(rights));
+		asm volatile("mtctr	%0; bctrl" : : "r"(insns));
+		FAIL_IF(remaining_faults != 0);
+		if (rights & PKEY_DISABLE_EXECUTE)
+			FAIL_IF(fault_code != SEGV_PKUERR);
+
+		/* Free the current pkey */
+		sys_pkey_free(pkey);
+
+		/* Find next valid combination of pkey rights */
+		rights = next_pkey_rights(rights);
+	} while (rights);
+
+	/* Cleanup */
+	munmap((void *) insns, pgsize);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 0000000..2db76e5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_BLR	0x4e800020
+#define PROT_RWX	(PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS	1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	void *pgstart;
+	size_t pgsize;
+	int pkey;
+
+	pkey = siginfo_pkey(sinfo);
+
+	/* Check if this fault originated from a pkey access violation */
+	if (sinfo->si_code != SEGV_PKUERR) {
+		sigsafe_err("got a fault for an unexpected reason\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the restrictive pkey */
+	if (pkey != rest_pkey) {
+		sigsafe_err("got a fault for an unexpected pkey\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for the same iteration */
+	if (fault_count > 0) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+	pgsize = getpagesize();
+	pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+	/*
+	 * If the current fault occurred due to lack of execute rights,
+	 * reassociate the page with the exec-only pkey since execute
+	 * rights cannot be changed directly for the faulting pkey as
+	 * IAMR is inaccessible from userspace.
+	 *
+	 * Otherwise, if the current fault occurred due to lack of
+	 * read-write rights, change the AMR permission bits for the
+	 * pkey.
+	 *
+	 * This will let the test continue.
+	 */
+	if (rights == PKEY_DISABLE_EXECUTE &&
+	    mprotect(pgstart, pgsize, PROT_EXEC))
+		_exit(1);
+	else
+		pkey_set_rights(pkey, 0);
+
+	fault_count++;
+}
+
+struct region {
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+};
+
+static void *protect(void *p)
+{
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	FAIL_IF_EXIT(!base);
+
+	/* No read, write and execute restrictions */
+	rights = 0;
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+	/* Allocate the permissive pkey */
+	perm_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(perm_pkey < 0);
+
+	/*
+	 * Repeatedly try to protect the common region with a permissive
+	 * pkey
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * restrictive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the permissive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       perm_pkey));
+	}
+
+	/* Free the permissive pkey */
+	sys_pkey_free(perm_pkey);
+
+	return NULL;
+}
+
+static void *protect_access(void *p)
+{
+	size_t size, numinsns;
+	unsigned int *base;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	rights = ((struct region *) p)->rights;
+	numinsns = size / sizeof(base[0]);
+	FAIL_IF_EXIT(!base);
+
+	/* Allocate the restrictive pkey */
+	rest_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(rest_pkey < 0);
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+	printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+	       (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+	       (rights == PKEY_DISABLE_WRITE)  ? "write" : "read",
+	       base, base + numinsns);
+
+	/*
+	 * Repeatedly try to protect the common region with a restrictive
+	 * pkey and read, write or execute from it
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * permissive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the restrictive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       rest_pkey));
+
+		/* Choose a random instruction word address from the region */
+		fault_addr = base + (rand() % numinsns);
+		fault_count = 0;
+
+		switch (rights) {
+		/* Read protection test */
+		case PKEY_DISABLE_ACCESS:
+			/*
+			 * Read an instruction word from the region and
+			 * verify if it has not been overwritten to
+			 * something unexpected
+			 */
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+				     *fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Write protection test */
+		case PKEY_DISABLE_WRITE:
+			/*
+			 * Write an instruction word to the region and
+			 * verify if the overwrite has succeeded
+			 */
+			*fault_addr = PPC_INST_BLR;
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Execute protection test */
+		case PKEY_DISABLE_EXECUTE:
+			/* Jump to the region and execute instructions */
+			asm volatile(
+				"mtctr	%0; bctrl"
+				: : "r"(fault_addr) : "ctr", "lr");
+			break;
+		}
+
+		/*
+		 * Restore the restrictions originally imposed by the
+		 * restrictive pkey as the signal handler would have
+		 * cleared out the corresponding AMR bits
+		 */
+		pkey_set_rights(rest_pkey, rights);
+	}
+
+	/* Free restrictive pkey */
+	sys_pkey_free(rest_pkey);
+
+	return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+	int pkeys[NR_PKEYS], i;
+
+	/* Exhaustively allocate all available pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		pkeys[i] = sys_pkey_alloc(0, rights);
+
+	/* Free all allocated pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+	pthread_t prot_thread, pacc_thread;
+	struct sigaction act;
+	pthread_attr_t attr;
+	size_t numinsns;
+	struct region r;
+	int ret, i;
+
+	srand(time(NULL));
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Allocate the region */
+	r.size = getpagesize();
+	r.base = mmap(NULL, r.size, PROT_RWX,
+		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(r.base == MAP_FAILED);
+
+	/*
+	 * Fill the region with no-ops with a branch at the end
+	 * for returning to the caller
+	 */
+	numinsns = r.size / sizeof(r.base[0]);
+	for (i = 0; i < numinsns - 1; i++)
+		r.base[i] = PPC_INST_NOP;
+	r.base[i] = PPC_INST_BLR;
+
+	/* Setup SIGSEGV handler */
+	act.sa_handler = 0;
+	act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+	/*
+	 * For these tests, the parent process should clear all bits of
+	 * AMR and IAMR, i.e. impose no restrictions, for all available
+	 * pkeys. This will be the base for the initial AMR and IAMR
+	 * values for all the test thread pairs.
+	 *
+	 * If the AMR and IAMR bits of all available pkeys are cleared
+	 * before running the tests and a fault is generated when
+	 * attempting to read, write or execute instructions from a
+	 * pkey protected region, the pkey responsible for this must be
+	 * the one from the protect-and-access thread since the other
+	 * one is fully permissive. Despite that, if the pkey reported
+	 * by siginfo is not the restrictive pkey, then there must be a
+	 * kernel bug.
+	 */
+	reset_pkeys(0);
+
+	/* Setup barrier for protect and protect-and-access threads */
+	FAIL_IF(pthread_attr_init(&attr) != 0);
+	FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+	/* Setup and start protect and protect-and-read threads */
+	puts("starting thread pair (protect, protect-and-read)");
+	r.rights = PKEY_DISABLE_ACCESS;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-write threads */
+	puts("starting thread pair (protect, protect-and-write)");
+	r.rights = PKEY_DISABLE_WRITE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-execute threads */
+	puts("starting thread pair (protect, protect-and-execute)");
+	r.rights = PKEY_DISABLE_EXECUTE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Cleanup */
+	FAIL_IF(pthread_attr_destroy(&attr) != 0);
+	FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+	munmap(r.base, r.size);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
index e2eed65..30b71b1 100644
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
 #include <asm/cputable.h>
 
@@ -18,8 +19,13 @@
 {
 	char *p;
 
-	/* 2.06 or later should support SAO */
-	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+	/*
+	 * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+	 * guests/LPARs by default, so also skip if we are running in a guest.
+	 */
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+		have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+		access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
 
 	/*
 	 * Ensure we can ask for PROT_SAO.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 0000000..ed91439
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+	LOAD,
+	STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+	unsigned long target;
+	char stack_cur;
+
+	if ((unsigned long)&stack_cur > target_sp)
+		return consume_stack(target_sp, stack_high, delta, type);
+	else {
+		// We don't really need this, but without it GCC might not
+		// generate a recursive call above.
+		stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+		asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+		asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+		target = stack_high - delta + 1;
+		volatile char *p = (char *)target;
+
+		if (type == STORE)
+			*p = c;
+		else
+			c = *p;
+
+		// Do something to prevent the stack frame being popped prior to
+		// our access above.
+		getpid();
+	}
+
+	return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+	unsigned long start, end;
+	static char buf[4096];
+	char name[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, name);
+		if (rc == 2)
+			continue;
+
+		if (rc != 3) {
+			printf("sscanf errored\n");
+			rc = -1;
+			break;
+		}
+
+		if (strstr(name, needle)) {
+			*low = start;
+			*high = end - 1;
+			rc = 0;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+	unsigned long low, stack_high;
+
+	assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+	assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+	printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+	       type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+	       stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+	return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+	pid_t pid;
+	int rc;
+
+	pid = fork();
+	if (pid == 0)
+		exit(child(stack_used, delta, type));
+
+	assert(waitpid(pid, &rc, 0) != -1);
+
+	if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+		return 0;
+
+	// We don't expect a non-zero exit that's not a signal
+	assert(!WIFEXITED(rc));
+
+	printf("Faulted:   %s delta %-7d used size 0x%06x signal %d\n",
+	       type == LOAD ? "load" : "store", delta, stack_used,
+	       WTERMSIG(rc));
+
+	return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE	(32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+	unsigned long delta;
+
+	// We should be able to access anywhere within the rlimit
+	for (delta = page_size; delta <= rlim_cur; delta += page_size)
+		assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+	assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+	// But if we go past the rlimit it should fail
+	assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+	unsigned long page_size;
+	struct rlimit rlimit;
+
+	page_size = getpagesize();
+	getrlimit(RLIMIT_STACK, &rlimit);
+	printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+	printf("Testing loads ...\n");
+	test_one_type(LOAD, page_size, rlimit.rlim_cur);
+	printf("Testing stores ...\n");
+	test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+	printf("All OK\n");
+
+	return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+	return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 0000000..c8b32a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+	sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+	char stack_cur;
+
+	if ((stack_base_ptr - &stack_cur) < stack_size)
+		return consume_stack(stack_size, write_pipe);
+	else {
+		stack_top_ptr = &stack_cur;
+
+		FAIL_IF(notify_parent(write_pipe));
+
+		while (!sig_occurred)
+			barrier();
+	}
+
+	return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+	struct sigaction act;
+	char stack_base;
+
+	act.sa_handler = sigusr1_handler;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	if (sigaction(SIGUSR1, &act, NULL) < 0)
+		err(1, "sigaction");
+
+	stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+	FAIL_IF(consume_stack(stack_size, write_pipe));
+
+	printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+		stack_size, stack_base_ptr, stack_top_ptr,
+		stack_base_ptr - stack_top_ptr);
+
+	return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+	union pipe read_pipe, write_pipe;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		close(read_pipe.read_fd);
+		close(write_pipe.write_fd);
+		exit(child(stack_size, read_pipe));
+	}
+
+	close(read_pipe.write_fd);
+	close(write_pipe.read_fd);
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	kill(pid, SIGUSR1);
+
+	FAIL_IF(wait_for_child(pid));
+
+	close(read_pipe.read_fd);
+	close(write_pipe.write_fd);
+
+	return 0;
+}
+
+int test(void)
+{
+	unsigned int i, size;
+
+	// Test with used stack from 1MB - 64K to 1MB + 64K
+	// Increment by 64 to get more coverage of odd sizes
+	for (i = 0; i < (128 * _KB); i += 64) {
+		size = i + (1 * _MB) - (64 * _KB);
+		FAIL_IF(test_one_size(size));
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 0000000..5a71184
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 0000000..640fad6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -O3 -m64 -I./include
+
+TEST_GEN_FILES := gzfht_test gunz_test
+TEST_PROGS := nx-gzip-test.sh
+
+include ../../lib.mk
+
+$(TEST_GEN_FILES): gzip_vas.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 0000000..9809dba
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,45 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+  /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+   sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+   /etc/udev/rules.d/99-nx-gzip.rules
+
+
+To manually build and run:
+$ gcc -O3 -I./include -o gzfht_test gzfht_test.c gzip_vas.c
+$ gcc -O3 -I./include -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 6413 bytes
+compressed 6413 to 3124 bytes total, crc32 checksum = abd15e8a
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum abd15e8a isize 0000190d
+stored   checksum abd15e8a isize 0000190d
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6  gzip_vas.c.nx.gz.nx.gunzip
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6  gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+By being simplistic these selftests expect to allocate the entire set of source
+and target pages in the memory so it needs enough memory to work.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 0000000..7c23d3d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface.  Not intended for productive uses or for performance or
+ * compression ratio measurements.  Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here.  See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE	// For aligned_alloc()
+#define _DEFAULT_SOURCE	// For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used)      ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len)  \
+					   fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur)            (cur)
+#define fifo_used_last_offset(cur)             (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde.  ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair.  For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0].  Returns the total number of
+ * bytes in ddl.  Caller is responsible for allocting the array of
+ * nx_dde_t *ddl.  If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+					uint32_t len)
+{
+	uint32_t ddecnt;
+	uint32_t bytes;
+
+	if (addr == NULL && len == 0) {
+		clearp_dde(ddl);
+		return 0;
+	}
+
+	NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+			__func__, len));
+
+	/* Number of ddes in the dde list ; == 0 when it is a direct dde */
+	ddecnt = getpnn(ddl, dde_count);
+	bytes = getp32(ddl, ddebc);
+
+	if (ddecnt == 0 && bytes == 0) {
+		/* First dde is unused; make it a direct dde */
+		bytes = len;
+		putp32(ddl, ddebc, bytes);
+		putp64(ddl, ddead, (uint64_t) addr);
+	} else if (ddecnt == 0) {
+		/* Converting direct to indirect dde
+		 * ddl[0] becomes head dde of ddl
+		 * copy direct to indirect first.
+		 */
+		ddl[1] = ddl[0];
+
+		/* Add the new dde next */
+		clear_dde(ddl[2]);
+		put32(ddl[2], ddebc, len);
+		put64(ddl[2], ddead, (uint64_t) addr);
+
+		/* Ddl head points to 2 direct ddes */
+		ddecnt = 2;
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes);
+		/* Pointer to the first direct dde */
+		putp64(ddl, ddead, (uint64_t) &ddl[1]);
+	} else {
+		/* Append a dde to an existing indirect ddl */
+		++ddecnt;
+		clear_dde(ddl[ddecnt]);
+		put64(ddl[ddecnt], ddead, (uint64_t) addr);
+		put32(ddl[ddecnt], ddebc, len);
+
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+	}
+	return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+				int wr)
+{
+	uint32_t indirect_count;
+	uint32_t buf_len;
+	long total;
+	uint64_t buf_addr;
+	struct nx_dde_t *dde_list;
+	int i;
+
+	assert(!!ddep);
+
+	indirect_count = getpnn(ddep, dde_count);
+
+	NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+			indirect_count));
+	NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+	if (indirect_count == 0) {
+		/* Direct dde */
+		buf_len = getp32(ddep, ddebc);
+		buf_addr = getp64(ddep, ddead);
+
+		NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+				buf_len, (void *)buf_addr));
+
+		if (buf_sz == 0)
+			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+		else
+			nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+					buf_sz), page_sz, wr);
+
+		return ERR_NX_OK;
+	}
+
+	/* Indirect dde */
+	if (indirect_count > MAX_DDE_COUNT)
+		return ERR_NX_EXCESSIVE_DDE;
+
+	/* First address of the list */
+	dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+	if (buf_sz == 0)
+		buf_sz = getp32(ddep, ddebc);
+
+	total = 0;
+	for (i = 0; i < indirect_count; i++) {
+		buf_len = get32(dde_list[i], ddebc);
+		buf_addr = get64(dde_list[i], ddead);
+		total += buf_len;
+
+		NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+				buf_len, (void *)buf_addr));
+		NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+		/* Touching fewer pages than encoded in the ddebc */
+		if (total > buf_sz) {
+			buf_len = NX_MIN(buf_len, total - buf_sz);
+			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+			NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+				      buf_len));
+			NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+			break;
+		}
+		nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+	}
+	return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+			 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	uint64_t csbaddr;
+
+	memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	cmdp->crb.source_dde = *src;
+	cmdp->crb.target_dde = *dst;
+
+	/* Status, output byte count in tpbc */
+	csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+	put64(cmdp->crb, csb_address, csbaddr);
+
+	/* NX reports input bytes in spbc; cleared */
+	cmdp->cpb.out_spbc_comp_wrap = 0;
+	cmdp->cpb.out_spbc_comp_with_count = 0;
+	cmdp->cpb.out_spbc_decomp = 0;
+
+	/* Clear output */
+	put32(cmdp->cpb, out_crc, INIT_CRC);
+	put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+	/* Submit the crb, the job descriptor, to the accelerator. */
+	return nxu_submit_job(cmdp, handle);
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+	FILE *inpf = NULL;
+	FILE *outf = NULL;
+
+	int c, expect, i, cc, rc = 0;
+	char gzfname[FNAME_MAX];
+
+	/* Queuing, file ops, byte counting */
+	char *fifo_in, *fifo_out;
+	int used_in, cur_in, used_out, cur_out, read_sz, n;
+	int first_free, last_free, first_used, last_used;
+	int first_offset, last_offset;
+	int write_sz, free_space, source_sz;
+	int source_sz_estimate, target_sz_estimate;
+	uint64_t last_comp_ratio = 0; /* 1000 max */
+	uint64_t total_out = 0;
+	int is_final, is_eof;
+
+	/* nx hardware */
+	int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+	int history_len = 0;
+	struct nx_gzip_crb_cpb_t cmd, *cmdp;
+	struct nx_dde_t *ddl_in;
+	struct nx_dde_t dde_in[6] __aligned(128);
+	struct nx_dde_t *ddl_out;
+	struct nx_dde_t dde_out[6] __aligned(128);
+	int pgfault_retries;
+
+	/* when using mmap'ed files */
+	off_t input_file_offset;
+
+	if (argc > 2) {
+		fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+		fprintf(stderr, "    writes to stdout or <fname>.nx.gunzip\n");
+		return -1;
+	}
+
+	if (argc == 1) {
+		inpf = stdin;
+		outf = stdout;
+	} else if (argc == 2) {
+		char w[1024];
+		char *wp;
+
+		inpf = fopen(argv[1], "r");
+		if (inpf == NULL) {
+			perror(argv[1]);
+			return -1;
+		}
+
+		/* Make a new file name to write to.  Ignoring '.gz' */
+		wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+		strcpy(w, wp);
+		strcat(w, ".nx.gunzip");
+
+		outf = fopen(w, "w");
+		if (outf == NULL) {
+			perror(w);
+			return -1;
+		}
+	}
+
+	/* Decode the gzip header */
+	c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x08; /* CM */
+	if (c != expect)
+		goto err1;
+
+	int flg = GETINPC(inpf); /* FLG */
+
+	if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+		goto err2;
+
+	fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+	/* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+	 * sample code.
+	 */
+	for (i = 0; i < 6; i++) {
+		char tmp[10];
+
+		tmp[i] = GETINPC(inpf);
+		if (tmp[i] == EOF)
+			goto err3;
+		fprintf(stderr, "%02x ", tmp[i]);
+		if (i == 5)
+			fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+	/* FNAME */
+	if (flg & 0x8) {
+		int k = 0;
+
+		do {
+			c = GETINPC(inpf);
+			if (c == EOF || k >= FNAME_MAX)
+				goto err3;
+			gzfname[k++] = c;
+		} while (c);
+		fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+	}
+
+	/* FHCRC */
+	if (flg & 0x2) {
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		fprintf(stderr, "gzHeader FHCRC: ignored\n");
+	}
+
+	used_in = cur_in = used_out = cur_out = 0;
+	is_final = is_eof = 0;
+
+	/* Allocate one page larger to prevent page faults due to NX
+	 * overfetching.
+	 * Either do this (char*)(uintptr_t)aligned_alloc or use
+	 * -std=c11 flag to make the int-to-pointer warning go away.
+	 */
+	assert((fifo_in  = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_in_len + page_sz)) != NULL);
+	assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_out_len + page_sz + line_sz)) != NULL);
+	/* Leave unused space due to history rounding rules */
+	fifo_out = fifo_out + line_sz;
+	nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+	ddl_in  = &dde_in[0];
+	ddl_out = &dde_out[0];
+	cmdp = &cmd;
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+	/* Read from .gz file */
+
+	NXPRT(fprintf(stderr, "read_state:\n"));
+
+	if (is_eof != 0)
+		goto write_state;
+
+	/* We read in to fifo_in in two steps: first: read in to from
+	 * cur_in to the end of the buffer.  last: if free space wrapped
+	 * around, read from fifo_in offset 0 to offset cur_in.
+	 */
+
+	/* Reset fifo head to reduce unnecessary wrap arounds */
+	cur_in = (used_in == 0) ? 0 : cur_in;
+
+	/* Free space total is reduced by a gap */
+	free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+			    - line_sz);
+
+	/* Free space may wrap around as first and last */
+	first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+	last_free  = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+	/* Start offsets of the free memory */
+	first_offset = fifo_free_first_offset(cur_in, used_in);
+	last_offset  = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+	/* Reduce read_sz because of the line_sz gap */
+	read_sz = NX_MIN(free_space, first_free);
+	n = 0;
+	if (read_sz > 0) {
+		/* Read in to offset cur_in + used_in */
+		n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+		used_in = used_in + n;
+		free_space = free_space - n;
+		assert(n <= read_sz);
+		if (n != read_sz) {
+			/* Either EOF or error; exit the read loop */
+			is_eof = 1;
+			goto write_state;
+		}
+	}
+
+	/* If free space wrapped around */
+	if (last_free > 0) {
+		/* Reduce read_sz because of the line_sz gap */
+		read_sz = NX_MIN(free_space, last_free);
+		n = 0;
+		if (read_sz > 0) {
+			n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+			used_in = used_in + n;       /* Increase used space */
+			free_space = free_space - n; /* Decrease free space */
+			assert(n <= read_sz);
+			if (n != read_sz) {
+				/* Either EOF or error; exit the read loop */
+				is_eof = 1;
+				goto write_state;
+			}
+		}
+	}
+
+	/* At this point we have used_in bytes in fifo_in with the
+	 * data head starting at cur_in and possibly wrapping around.
+	 */
+
+write_state:
+
+	/* Write decompressed data to output file */
+
+	NXPRT(fprintf(stderr, "write_state:\n"));
+
+	if (used_out == 0)
+		goto decomp_state;
+
+	/* If fifo_out has data waiting, write it out to the file to
+	 * make free target space for the accelerator used bytes in
+	 * the first and last parts of fifo_out.
+	 */
+
+	first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+	last_used  = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+	write_sz = first_used;
+
+	n = 0;
+	if (write_sz > 0) {
+		n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+		used_out = used_out - n;
+		/* Move head of the fifo */
+		cur_out = (cur_out + n) % fifo_out_len;
+		assert(n <= write_sz);
+		if (n != write_sz) {
+			fprintf(stderr, "error: write\n");
+			rc = -1;
+			goto err5;
+		}
+	}
+
+	if (last_used > 0) { /* If more data available in the last part */
+		write_sz = last_used; /* Keep it here for later */
+		n = 0;
+		if (write_sz > 0) {
+			n = fwrite(fifo_out, 1, write_sz, outf);
+			used_out = used_out - n;
+			cur_out = (cur_out + n) % fifo_out_len;
+			assert(n <= write_sz);
+			if (n != write_sz) {
+				fprintf(stderr, "error: write\n");
+				rc = -1;
+				goto err5;
+			}
+		}
+	}
+
+decomp_state:
+
+	/* NX decompresses input data */
+
+	NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+	if (is_final)
+		goto finish_state;
+
+	/* Address/len lists */
+	clearp_dde(ddl_in);
+	clearp_dde(ddl_out);
+
+	/* FC, CRC, HistLen, Table 6-6 */
+	if (resuming) {
+		/* Resuming a partially decompressed input.
+		 * The key to resume is supplying the 32KB
+		 * dictionary (history) to NX, which is basically
+		 * the last 32KB of output produced.
+		 */
+		fc = GZIP_FC_DECOMPRESS_RESUME;
+
+		cmdp->cpb.in_crc   = cmdp->cpb.out_crc;
+		cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+		/* Round up the history size to quadword.  Section 2.10 */
+		history_len = (history_len + 15) / 16;
+		putnn(cmdp->cpb, in_histlen, history_len);
+		history_len = history_len * 16; /* bytes */
+
+		if (history_len > 0) {
+			/* Chain in the history buffer to the DDE list */
+			if (cur_out >= history_len) {
+				nx_append_dde(ddl_in, fifo_out
+					      + (cur_out - history_len),
+					      history_len);
+			} else {
+				nx_append_dde(ddl_in, fifo_out
+					      + ((fifo_out_len + cur_out)
+					      - history_len),
+					      history_len - cur_out);
+				/* Up to 32KB history wraps around fifo_out */
+				nx_append_dde(ddl_in, fifo_out, cur_out);
+			}
+
+		}
+	} else {
+		/* First decompress job */
+		fc = GZIP_FC_DECOMPRESS;
+
+		history_len = 0;
+		/* Writing 0 clears out subc as well */
+		cmdp->cpb.in_histlen = 0;
+		total_out = 0;
+
+		put32(cmdp->cpb, in_crc, INIT_CRC);
+		put32(cmdp->cpb, in_adler, INIT_ADLER);
+		put32(cmdp->cpb, out_crc, INIT_CRC);
+		put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+		/* Assuming 10% compression ratio initially; use the
+		 * most recently measured compression ratio as a
+		 * heuristic to estimate the input and output
+		 * sizes.  If we give too much input, the target buffer
+		 * overflows and NX cycles are wasted, and then we
+		 * must retry with smaller input size.  1000 is 100%.
+		 */
+		last_comp_ratio = 100UL;
+	}
+	cmdp->crb.gzip_fc = 0;
+	putnn(cmdp->crb, gzip_fc, fc);
+
+	/*
+	 * NX source buffers
+	 */
+	first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+	last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+	if (first_used > 0)
+		nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+	if (last_used > 0)
+		nx_append_dde(ddl_in, fifo_in, last_used);
+
+	/*
+	 * NX target buffers
+	 */
+	first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+	last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+	/* Reduce output free space amount not to overwrite the history */
+	int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+				- (1<<16));
+
+	NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+		      target_max));
+
+	first_free = NX_MIN(target_max, first_free);
+	if (first_free > 0) {
+		first_offset = fifo_free_first_offset(cur_out, used_out);
+		nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+	}
+
+	if (last_free > 0) {
+		last_free = NX_MIN(target_max - first_free, last_free);
+		if (last_free > 0) {
+			last_offset = fifo_free_last_offset(cur_out, used_out,
+							    fifo_out_len);
+			nx_append_dde(ddl_out, fifo_out + last_offset,
+				      last_free);
+		}
+	}
+
+	/* Target buffer size is used to limit the source data size
+	 * based on previous measurements of compression ratio.
+	 */
+
+	/* source_sz includes history */
+	source_sz = getp32(ddl_in, ddebc);
+	assert(source_sz > history_len);
+	source_sz = source_sz - history_len;
+
+	/* Estimating how much source is needed to 3/4 fill a
+	 * target_max size target buffer.  If we overshoot, then NX
+	 * must repeat the job with smaller input and we waste
+	 * bandwidth.  If we undershoot then we use more NX calls than
+	 * necessary.
+	 */
+
+	source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+				/ 4000;
+
+	if (source_sz_estimate < source_sz) {
+		/* Target might be small, therefore limiting the
+		 * source data.
+		 */
+		source_sz = source_sz_estimate;
+		target_sz_estimate = target_max;
+	} else {
+		/* Source file might be small, therefore limiting target
+		 * touch pages to a smaller value to save processor cycles.
+		 */
+		target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+					/ (last_comp_ratio + 1);
+		target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+					    target_max);
+	}
+
+	source_sz = source_sz + history_len;
+
+	/* Some NX condition codes require submitting the NX job again.
+	 * Kernel doesn't handle NX page faults. Expects user code to
+	 * touch pages.
+	 */
+	pgfault_retries = NX_MAX_FAULTS;
+
+restart_nx:
+
+	putp32(ddl_in, ddebc, source_sz);
+
+	/* Fault in pages */
+	nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
+	nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+	nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+	/* Send job to NX */
+	cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+	switch (cc) {
+
+	case ERR_NX_AT_FAULT:
+
+		/* We touched the pages ahead of time.  In the most common case
+		 * we shouldn't be here.  But may be some pages were paged out.
+		 * Kernel should have placed the faulting address to fsaddr.
+		 */
+		NXPRT(fprintf(stderr, "ERR_NX_AT_FAULT %p\n",
+			      (void *)cmdp->crb.csb.fsaddr));
+
+		if (pgfault_retries == NX_MAX_FAULTS) {
+			/* Try once with exact number of pages */
+			--pgfault_retries;
+			goto restart_nx;
+		} else if (pgfault_retries > 0) {
+			/* If still faulting try fewer input pages
+			 * assuming memory outage
+			 */
+			if (source_sz > page_sz)
+				source_sz = NX_MAX(source_sz / 2, page_sz);
+			--pgfault_retries;
+			goto restart_nx;
+		} else {
+			fprintf(stderr, "cannot make progress; too many ");
+			fprintf(stderr, "page fault retries cc= %d\n", cc);
+			rc = -1;
+			goto err5;
+		}
+
+	case ERR_NX_DATA_LENGTH:
+
+		NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+		NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+		/* Not an error in the most common case; it just says
+		 * there is trailing data that we must examine.
+		 *
+		 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+		 * Fig.6-7 and Table 6-8.
+		 */
+		nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+		if (!csb_ce_termination(nx_ce) &&
+		    csb_ce_partial_completion(nx_ce)) {
+			/* Check CPB for more information
+			 * spbc and tpbc are valid
+			 */
+			sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+			subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+			spbc = get32(cmdp->cpb, out_spbc_decomp);
+			tpbc = get32(cmdp->crb.csb, tpbc);
+			assert(target_max >= tpbc);
+
+			goto ok_cc3; /* not an error */
+		} else {
+			/* History length error when CE(1)=1 CE(0)=0. */
+			rc = -1;
+			fprintf(stderr, "history length error cc= %d\n", cc);
+			goto err5;
+		}
+
+	case ERR_NX_TARGET_SPACE:
+
+		/* Target buffer not large enough; retry smaller input
+		 * data; give at least 1 byte.  SPBC/TPBC are not valid.
+		 */
+		assert(source_sz > history_len);
+		source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+		NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+		NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+			      source_sz, history_len));
+		goto restart_nx;
+
+	case ERR_NX_OK:
+
+		/* This should not happen for gzip formatted data;
+		 * we need trailing crc and isize
+		 */
+		fprintf(stderr, "ERR_NX_OK\n");
+		spbc = get32(cmdp->cpb, out_spbc_decomp);
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		assert(target_max >= tpbc);
+		assert(spbc >= history_len);
+		source_sz = spbc - history_len;
+		goto offsets_state;
+
+	default:
+		fprintf(stderr, "error: cc= %d\n", cc);
+		rc = -1;
+		goto err5;
+	}
+
+ok_cc3:
+
+	NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+	assert(spbc > history_len);
+	source_sz = spbc - history_len;
+
+	/* Table 6-4: Source Final Block Type (SFBT) describes the
+	 * last processed deflate block and clues the software how to
+	 * resume the next job.  SUBC indicates how many input bits NX
+	 * consumed but did not process.  SPBC indicates how many
+	 * bytes of source were given to the accelerator including
+	 * history bytes.
+	 */
+
+	switch (sfbt) {
+		int dhtlen;
+
+	case 0x0: /* Deflate final EOB received */
+
+		/* Calculating the checksum start position. */
+
+		source_sz = source_sz - subc / 8;
+		is_final = 1;
+		break;
+
+		/* Resume decompression cases are below. Basically
+		 * indicates where NX has suspended and how to resume
+		 * the input stream.
+		 */
+
+	case 0x8: /* Within a literal block; use rembytecount */
+	case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+		/* Supply the partially processed source byte again */
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* SUBC LS 3bits: number of bits in the first source byte need
+		 * to be processed.
+		 * 000 means all 8 bits;  Table 6-3
+		 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+		 */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+						      out_rembytecnt));
+		break;
+
+	case 0xA: /* Within a FH block; */
+	case 0xB: /* Within a FH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		break;
+
+	case 0xC: /* Within a DH block; */
+	case 0xD: /* Within a DH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		dhtlen = getnn(cmdp->cpb, out_dhtlen);
+		putnn(cmdp->cpb, in_dhtlen, dhtlen);
+		assert(dhtlen >= 42);
+
+		/* Round up to a qword */
+		dhtlen = (dhtlen + 127) / 128;
+
+		while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+			--dhtlen;
+			cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+		}
+		break;
+
+	case 0xE: /* Within a block header; bfinal=0; */
+		     /* Also given if source data exactly ends (SUBC=0) with
+		      * EOB code with BFINAL=0.  Means the next byte will
+		      * contain a block header.
+		      */
+	case 0xF: /* within a block header with BFINAL=1. */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		/* Engine did not process any data */
+		if (is_eof && (source_sz == 0))
+			is_final = 1;
+	}
+
+offsets_state:
+
+	/* Adjust the source and target buffer offsets and lengths  */
+
+	NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+	/* Delete input data from fifo_in */
+	used_in = used_in - source_sz;
+	cur_in = (cur_in + source_sz) % fifo_in_len;
+	input_file_offset = input_file_offset + source_sz;
+
+	/* Add output data to fifo_out */
+	used_out = used_out + tpbc;
+
+	assert(used_out <= fifo_out_len);
+
+	total_out = total_out + tpbc;
+
+	/* Deflate history is 32KB max.  No need to supply more
+	 * than 32KB on a resume.
+	 */
+	history_len = (total_out > window_max) ? window_max : total_out;
+
+	/* To estimate expected expansion in the next NX job; 500 means 50%.
+	 * Deflate best case is around 1 to 1000.
+	 */
+	last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+			  / ((uint64_t)tpbc + 1);
+	last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+	NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+		      last_comp_ratio, source_sz, spbc, tpbc));
+
+	resuming = 1;
+
+finish_state:
+
+	NXPRT(fprintf(stderr, "finish_state:\n"));
+
+	if (is_final) {
+		if (used_out)
+			goto write_state; /* More data to write out */
+		else if (used_in < 8) {
+			/* Need at least 8 more bytes containing gzip crc
+			 * and isize.
+			 */
+			rc = -1;
+			goto err4;
+		} else {
+			/* Compare checksums and exit */
+			int i;
+			unsigned char tail[8];
+			uint32_t cksum, isize;
+
+			for (i = 0; i < 8; i++)
+				tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+			fprintf(stderr, "computed checksum %08x isize %08x\n",
+				cmdp->cpb.out_crc, (uint32_t) (total_out
+				% (1ULL<<32)));
+			cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+				 | (uint32_t) tail[2]<<16
+				 | (uint32_t) tail[3]<<24);
+			isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+				 | (uint32_t) tail[6]<<16
+				 | (uint32_t) tail[7]<<24);
+			fprintf(stderr, "stored   checksum %08x isize %08x\n",
+				cksum, isize);
+
+			if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+			    (total_out % (1ULL<<32))) {
+				rc = 0;	goto ok1;
+			} else {
+				rc = -1; goto err4;
+			}
+		}
+	} else
+		goto read_state;
+
+	return -1;
+
+err1:
+	fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+		expect, c);
+	return -1;
+
+err2:
+	fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+	return -1;
+
+err3:
+	fprintf(stderr, "error: gzip header\n");
+	return -1;
+
+err4:
+	fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+	fprintf(stderr, "decomp is complete: fclose\n");
+	fclose(outf);
+
+	return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = nxu_sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = decompress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 0000000..02dffb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements.  For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management.  Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE	// For aligned_alloc()
+#define _DEFAULT_SOURCE	// For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+				uint32_t dstlen, int with_count,
+				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	uint32_t fc;
+
+	assert(!!cmdp);
+
+	put32(cmdp->crb, gzip_fc, 0);  /* clear */
+	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+			    GZIP_FC_COMPRESS_RESUME_FHT;
+	putnn(cmdp->crb, gzip_fc, fc);
+	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	/* Section 6.6 programming notes; spbc may be in two different
+	 * places depending on FC.
+	 */
+	if (!with_count)
+		put32(cmdp->cpb, out_spbc_comp, 0);
+	else
+		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+	/* Figure 6-3 6-4; CSB location */
+	put64(cmdp->crb, csb_address, 0);
+	put64(cmdp->crb, csb_address,
+	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+	/* Source direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.source_dde);
+	putnn(cmdp->crb.source_dde, dde_count, 0);
+	put32(cmdp->crb.source_dde, ddebc, srclen);
+	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+	/* Target direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.target_dde);
+	putnn(cmdp->crb.target_dde, dde_count, 0);
+	put32(cmdp->crb.target_dde, ddebc, dstlen);
+	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+	/* Submit the crb, the job descriptor, to the accelerator */
+	return nxu_submit_job(cmdp, handle);
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+	int i = 0;
+
+	buf[i++] = 0x1f; /* ID1 */
+	buf[i++] = 0x8b; /* ID2 */
+	buf[i++] = 0x08; /* CM  */
+	buf[i++] = 0x00; /* FLG */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x04; /* XFL 4=fastest */
+	buf[i++] = 0x03; /* OS UNIX */
+
+	return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+	struct stat statbuf;
+	FILE *fp;
+	char *p;
+	size_t num_bytes;
+
+	if (stat(fname, &statbuf)) {
+		perror(fname);
+		return(-1);
+	}
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+	num_bytes = fread(p, 1, statbuf.st_size, fp);
+	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+		perror(fname);
+		return(-1);
+	}
+	*buf = p;
+	*bufsize = num_bytes;
+	return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+	FILE *fp;
+	size_t num_bytes;
+
+	fp = fopen(fname, "w");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	num_bytes = fwrite(buf, 1, bufsize, fp);
+	if (ferror(fp) || (num_bytes != bufsize)) {
+		perror(fname);
+		return(-1);
+	}
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+	uint64_t flush;
+	int shift = (tebc & 0x7);
+
+	if (tebc > 0) {
+		/* Last byte is partially full */
+		buf = buf - 1;
+		*buf = *buf & (unsigned char) ((1<<tebc)-1);
+	} else
+		*buf = 0;
+	flush = ((0x1ULL & final) << shift) | *buf;
+	shift = shift + 3; /* BFINAL and BTYPE written */
+	shift = (shift <= 8) ? 8 : 16;
+	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+	shift = shift + 32;
+	while (shift > 0) {
+		*buf++ = (unsigned char) (flush & 0xffULL);
+		flush = flush >> 8;
+		shift = shift - 8;
+	}
+	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Final deflate block bit.  This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+	char *b = buf;
+
+	if (bfinal)
+		*b = *b | (unsigned char) 0x01;
+	else
+		*b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+	char *inbuf, *outbuf, *srcbuf, *dstbuf;
+	char outname[FNAME_MAX];
+	uint32_t srclen, dstlen;
+	uint32_t flushlen, chunk;
+	size_t inlen, outlen, dsttotlen, srctotlen;
+	uint32_t crc, spbc, tpbc, tebc;
+	int lzcounts = 0;
+	int cc;
+	int num_hdr_bytes;
+	struct nx_gzip_crb_cpb_t *cmdp;
+	uint32_t pagelen = 65536;
+	int fault_tries = NX_MAX_FAULTS;
+
+	cmdp = (void *)(uintptr_t)
+		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+			      sizeof(struct nx_gzip_crb_cpb_t));
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+		exit(-1);
+	}
+	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+		exit(-1);
+	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+	/* Generous output buffer for header/trailer */
+	outlen = 2 * inlen + 1024;
+
+	assert(NULL != (outbuf = (char *)malloc(outlen)));
+	nxu_touch_pages(outbuf, outlen, pagelen, 1);
+
+	/* Compress piecemeal in smallish chunks */
+	chunk = 1<<22;
+
+	/* Write the gzip header to the stream */
+	num_hdr_bytes = gzip_header_blank(outbuf);
+	dstbuf    = outbuf + num_hdr_bytes;
+	outlen    = outlen - num_hdr_bytes;
+	dsttotlen = num_hdr_bytes;
+
+	srcbuf    = inbuf;
+	srctotlen = 0;
+
+	/* Init the CRB, the coprocessor request block */
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+	/* Initial gzip crc32 */
+	put32(cmdp->cpb, in_crc, 0);
+
+	while (inlen > 0) {
+
+		/* Submit chunk size source data per job */
+		srclen = NX_MIN(chunk, inlen);
+		/* Supply large target in case data expands */
+		dstlen = NX_MIN(2*srclen, outlen);
+
+		/* Page faults are handled by the user code */
+
+		/* Fault-in pages; an improved code wouldn't touch so
+		 * many pages but would try to estimate the
+		 * compression ratio and adjust both the src and dst
+		 * touch amounts.
+		 */
+		nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+				1);
+		nxu_touch_pages(srcbuf, srclen, pagelen, 0);
+		nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+		cc = compress_fht_sample(
+			srcbuf, srclen,
+			dstbuf, dstlen,
+			lzcounts, cmdp, handle);
+
+		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+		    cc != ERR_NX_AT_FAULT) {
+			fprintf(stderr, "nx error: cc= %d\n", cc);
+			exit(-1);
+		}
+
+		/* Page faults are handled by the user code */
+		if (cc == ERR_NX_AT_FAULT) {
+			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+				  fault_tries,
+				  (unsigned long long) cmdp->crb.csb.fsaddr));
+			fault_tries--;
+			if (fault_tries > 0) {
+				continue;
+			} else {
+				fprintf(stderr, "error: cannot progress; ");
+				fprintf(stderr, "too many faults\n");
+				exit(-1);
+			};
+		}
+
+		fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
+
+		inlen     = inlen - srclen;
+		srcbuf    = srcbuf + srclen;
+		srctotlen = srctotlen + srclen;
+
+		/* Two possible locations for spbc depending on the function
+		 * code.
+		 */
+		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+			get32(cmdp->cpb, out_spbc_comp_with_count);
+		assert(spbc == srclen);
+
+		/* Target byte count */
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		/* Target ending bit count */
+		tebc = getnn(cmdp->cpb, out_tebc);
+		NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
+		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+		if (inlen > 0) { /* More chunks to go */
+			set_bfinal(dstbuf, 0);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+			/* Round up to the next byte with a flush
+			 * block; do not set the BFINAqL bit.
+			 */
+			flushlen  = append_sync_flush(dstbuf, tebc, 0);
+			dsttotlen = dsttotlen + flushlen;
+			outlen    = outlen - flushlen;
+			dstbuf    = dstbuf + flushlen;
+			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+					flushlen));
+		} else {  /* Done */
+			/* Set the BFINAL bit of the last block per Deflate
+			 * specification.
+			 */
+			set_bfinal(dstbuf, 1);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+		}
+
+		/* Resuming crc32 for the next chunk */
+		crc = get32(cmdp->cpb, out_crc);
+		put32(cmdp->cpb, in_crc, crc);
+		crc = be32toh(crc);
+	}
+
+	/* Append crc32 and ISIZE to the end */
+	memcpy(dstbuf, &crc, 4);
+	memcpy(dstbuf+4, &srctotlen, 4);
+	dsttotlen = dsttotlen + 8;
+	outlen    = outlen - 8;
+
+	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+	strcpy(outname, argv[1]);
+	strcat(outname, FEXT);
+	if (write_output_file(outname, outbuf, dsttotlen)) {
+		fprintf(stderr, "write error: %s\n", outname);
+		exit(-1);
+	}
+
+	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+		dsttotlen);
+	fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+	if (inbuf != NULL)
+		free(inbuf);
+
+	if (outbuf != NULL)
+		free(outbuf);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = nxu_sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = compress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 0000000..c055885
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "vas-api.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync()    ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default()  ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low()      ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+	int fd;
+	int function;
+	void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+	int rc, fd;
+	void *addr;
+	struct vas_tx_win_open_attr txattr;
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, " open device name %s\n", devname);
+		return -errno;
+	}
+
+	memset(&txattr, 0, sizeof(txattr));
+	txattr.version = 1;
+	txattr.vas_id = pri;
+	rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
+	if (rc < 0) {
+		fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+		rc = -errno;
+		goto out;
+	}
+
+	addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+	if (addr == MAP_FAILED) {
+		fprintf(stderr, "mmap() failed, errno %d\n", errno);
+		rc = -errno;
+		goto out;
+	}
+	handle->fd = fd;
+	handle->paste_addr = (void *)((char *)addr + 0x400);
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+	int rc;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct nx_handle *nxhandle;
+
+	if (function != NX_FUNC_COMP_GZIP) {
+		errno = EINVAL;
+		fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+		return NULL;
+	}
+
+
+	nxhandle = malloc(sizeof(*nxhandle));
+	if (!nxhandle) {
+		errno = ENOMEM;
+		fprintf(stderr, " No memory\n");
+		return NULL;
+	}
+
+	nxhandle->function = function;
+	rc = open_device_nodes(devname, pri, nxhandle);
+	if (rc < 0) {
+		errno = -rc;
+		fprintf(stderr, " open_device_nodes failed\n");
+		return NULL;
+	}
+
+	return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+	int rc = 0;
+	struct nx_handle *nxhandle = handle;
+
+	rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+	if (rc < 0) {
+		fprintf(stderr, "munmap() failed, errno %d\n", errno);
+		return rc;
+	}
+	close(nxhandle->fd);
+	free(nxhandle);
+
+	return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+	long poll = 0;
+	uint64_t t;
+
+	/* Save power and let other threads use the h/w. top may show
+	 * 100% but only because OS doesn't know we slowed the this
+	 * h/w thread while polling. We're letting other threads have
+	 * higher throughput on the core.
+	 */
+	cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH     300000UL
+
+	t = __ppc_get_timebase();
+
+	while (getnn(cmdp->crb.csb, csb_v) == 0) {
+		++poll;
+		hwsync();
+
+		cpu_pri_low();
+
+		/* usleep(0) takes around 29000 ticks ~60 us.
+		 * 300000 is spinning for about 600 us then
+		 * start sleeping.
+		 */
+		if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+			cpu_pri_default();
+			usleep(1);
+		}
+
+		if (poll > CSB_MAX_POLL)
+			break;
+
+		/* Fault address from signal handler */
+		if (nx_fault_storage_address) {
+			cpu_pri_default();
+			return -EAGAIN;
+		}
+
+	}
+
+	cpu_pri_default();
+
+	/* hw has updated csb and output buffer */
+	hwsync();
+
+	/* Check CSB flags. */
+	if (getnn(cmdp->crb.csb, csb_v) == 0) {
+		fprintf(stderr, "CSB still not valid after %d polls.\n",
+			(int) poll);
+		prt_err("CSB still not valid after %d polls, giving up.\n",
+			(int) poll);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int i, ret, retries;
+	struct nx_handle *nxhandle = handle;
+
+	assert(handle != NULL);
+	i = 0;
+	retries = 5000;
+	while (i++ < retries) {
+		hwsync();
+		vas_copy(&cmdp->crb, 0);
+		ret = vas_paste(nxhandle->paste_addr, 0);
+		hwsync();
+
+		NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+				i, retries, ret));
+
+		if ((ret == 2) || (ret == 3)) {
+
+			ret = nx_wait_for_csb(cmdp);
+			if (!ret) {
+				goto out;
+			} else if (ret == -EAGAIN) {
+				long x;
+
+				prt_err("Touching address %p, 0x%lx\n",
+					 nx_fault_storage_address,
+					 *(long *) nx_fault_storage_address);
+				x = *(long *) nx_fault_storage_address;
+				*(long *) nx_fault_storage_address = x;
+				nx_fault_storage_address = 0;
+				continue;
+			} else {
+				prt_err("wait_for_csb() returns %d\n", ret);
+				break;
+			}
+		} else {
+			if (i < 10) {
+				/* spin for few ticks */
+#define SPIN_TH 500UL
+				uint64_t fail_spin;
+
+				fail_spin = __ppc_get_timebase();
+				while ((__ppc_get_timebase() - fail_spin) <
+					 SPIN_TH)
+					;
+			} else {
+				/* sleep */
+				unsigned int pr = 0;
+
+				if (pr++ % 100 == 0) {
+					prt_err("Paste attempt %d/", i);
+					prt_err("%d, failed pid= %d\n", retries,
+						getpid());
+				}
+				usleep(1);
+			}
+			continue;
+		}
+	}
+
+out:
+	cpu_pri_default();
+
+	return ret;
+}
+
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int cc;
+
+	cc = nxu_run_job(cmdp, handle);
+
+	if (!cc)
+		cc = getnn(cmdp->crb.csb, csb_cc);      /* CC Table 6-8 */
+
+	return cc;
+}
+
+
+void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+		sig, info->si_code, info->si_addr);
+
+	nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Fault in pages prior to NX job submission.  wr=1 may be required to
+ * touch writeable pages.  System zero pages do not fault-in the page as
+ * intended.  Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+	char *begin = buf;
+	char *end = (char *) buf + buf_len - 1;
+	volatile char t;
+
+	assert(buf_len >= 0 && !!buf);
+
+	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+			(buf + buf_len), buf_len, wr));
+
+	if (buf_len <= 0 || buf == NULL)
+		return -1;
+
+	do {
+		t = *begin;
+		if (wr)
+			*begin = t;
+		begin = begin + page_len;
+	} while (begin < end);
+
+	/* When buf_sz is small or buf tail is in another page */
+	t = *end;
+	if (wr)
+		*end = t;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
new file mode 100644
index 0000000..0db2d64
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* From asm-compat.h */
+#define __stringify_in_c(...)	#__VA_ARGS__
+#define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a)    (((a) & 0x1f) << 16)
+#define ___PPC_RB(b)    (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY                   0x7c20060c
+#define PPC_INST_PASTE                  0x7c20070d
+
+#define PPC_COPY(a, b)          stringify_in_c(.long PPC_INST_COPY | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b)         stringify_in_c(.long PPC_INST_PASTE | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+/*
+ * Copy/paste instructions:
+ *
+ *	copy RA,RB
+ *		Copy contents of address (RA) + effective_address(RB)
+ *		to internal copy-buffer.
+ *
+ *	paste RA,RB
+ *		Paste contents of internal copy-buffer to the address
+ *		(RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+	asm volatile(PPC_COPY(%0, %1)";"
+		:
+		: "b" (offset), "b" (crb)
+		: "memory");
+
+	return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+	__u32 cr;
+
+	cr = 0;
+	asm volatile(PPC_PASTE(%1, %2)";"
+		"mfocrf %0, 0x80;"
+		: "=r" (cr)
+		: "b" (offset), "b" (paste_address)
+		: "memory", "cr0");
+
+	return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/crb.h b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
new file mode 100644
index 0000000..ab10108
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842              (0x00003ff8)
+#define CCW_FC_842              (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	__u8 flags;
+	__u8 cs;
+	__u8 cc;
+	__u8 ce;
+	__be32 count;
+	__be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	__u8 count;
+	__u8 index;
+	__be32 length;
+	__be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	__u8 reserved[48];
+
+	struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c)         __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c)    __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c)         c->stamp.nx.flags
+#define crb_nx_fault_status(c)  c->stamp.nx.fault_status
+#define crb_nx_pswid(c)		c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS                  (0xff000000)
+#define CCW_CT                  (0x00ff0000)
+#define CCW_CD                  (0x0000ffff)
+#define CCW_CL                  (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
new file mode 100644
index 0000000..1abe23f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define	NX_FUNC_COMP_842	1
+#define NX_FUNC_COMP_GZIP	2
+
+#ifndef __aligned
+#define __aligned(x)	__attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+	bool use_crc;
+	bool decompress;		/* true decompress; false compress */
+	bool move_data;
+	int timeout;			/* seconds */
+};
+
+struct nxbuf_t {
+	int len;
+	char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+void *nx_function_begin(int function, int pri);
+
+int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+		void *arg);
+
+int nx_function_end(void *handle);
+
+#endif	/* _NX_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
new file mode 100644
index 0000000..16464e1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled()       (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled()    (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled()    (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics()   (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat()     (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+	pthread_mutex_lock(&mutex_log);					\
+	flock(nx_gzip_log->_fileno, LOCK_EX);				\
+	time_t t; struct tm *m; time(&t); m = localtime(&t);		\
+	fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] "		\
+		"pid %d: " fmt,	\
+		(int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+		(int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec,	\
+		(int)getpid(), ## __VA_ARGS__);				\
+	fflush(nx_gzip_log);						\
+	flock(nx_gzip_log->_fileno, LOCK_UN);				\
+	pthread_mutex_unlock(&mutex_log);				\
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) {			\
+	prt("%s:%u: Error: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do {	if (nx_dbg >= 1) {			\
+	prt("%s:%u: Warning: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do {	if (nx_dbg >= 2) {			\
+	prt("Info: "fmt, ## __VA_ARGS__);				\
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) {		\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do {	if (nx_gzip_gather_statistics()) {	\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do {						\
+		if (nx_gzip_hw_trace_enabled())				\
+			fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do {						\
+		if (nx_gzip_sw_trace_enabled())				\
+			fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ *              KiB for kilobyte
+ *              MiB for megabyte
+ *              GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif	/* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
new file mode 100644
index 0000000..20a4e88
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
@@ -0,0 +1,650 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ   286
+#define DSZ    30
+
+/* nx */
+#define DHTSZ  18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X)	X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X)	X
+#define nx_get_time()	__ppc_get_timebase()
+#define nx_get_freq()	__ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time()  (-1)
+#define nx_get_freq()  (-1)
+#endif
+
+#define NX_MAX_FAULTS  500
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+	uint32_t word[4];
+	uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+	/* Data Descriptor Element, Section 6.4 */
+	union {
+		uint32_t dde_count;
+		/* When dde_count == 0 ddead is a pointer to a data buffer;
+		 * ddebc is the buffer length bytes.
+		 * When dde_count > 0 dde is an indirect dde; ddead is a
+		 * pointer to a contiguous list of direct ddes; ddebc is the
+		 * total length of all data pointed to by the list of direct
+		 * ddes. Note that only one level of indirection is permitted.
+		 * See Section 6.4 of the user manual for additional details.
+		 */
+	};
+	uint32_t ddebc; /* dde byte count */
+	uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+	/* Coprocessor Status Block, Section 6.6  */
+	union {
+		uint32_t csb_v;
+		/* Valid bit. v must be set to 0 by the program
+		 * before submitting the coprocessor command.
+		 * Software can poll for the v bit
+		 */
+
+		uint32_t csb_f;
+		/* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+		uint32_t csb_cs;
+		/* cs completion sequence; unused */
+
+		uint32_t csb_cc;
+		/* cc completion code; cc != 0 exception occurred */
+
+		uint32_t csb_ce;
+		/* ce completion extension */
+
+	};
+	uint32_t tpbc;
+	/* target processed byte count TPBC */
+
+	uint64_t fsaddr;
+	/* Section 6.12.1 CSB NonZero error summary.  FSA Failing storage
+	 * address.  Address where error occurred. When available, written
+	 * to A field of CSB
+	 */
+} __aligned(16);
+
+struct nx_ccb_t {
+	/* Coprocessor Completion Block, Section 6.7 */
+
+	uint32_t reserved[3];
+	union {
+		/* When crb.c==0 (no ccb defined) it is reserved;
+		 * When crb.c==1 (ccb defined) it is cm
+		 */
+
+		uint32_t ccb_cm;
+		/* Signal interrupt of crb.c==1 and cm==1 */
+
+		uint32_t word;
+		/* generic access to the 32bit word */
+	};
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+	/*
+	 * CRB operand of the paste coprocessor instruction is stamped
+	 * in quadword 4 with the information shown here as its written
+	 * in to the receive FIFO of the coprocessor
+	 */
+
+	union {
+		uint32_t vas_buf_num;
+		/* Verification only vas buffer number which correlates to
+		 * the low order bits of the atag in the paste command
+		 */
+
+		uint32_t send_wc_id;
+		/* Pointer to Send Window Context that provides for NX address
+		 * translation information, such as MSR and LPCR bits, job
+		 * completion interrupt RA, PSWID, and job utilization counter.
+		 */
+
+	};
+	union {
+		uint32_t recv_wc_id;
+		/* Pointer to Receive Window Context. NX uses this to return
+		 * credits to a Receive FIFO as entries are dequeued.
+		 */
+
+	};
+	uint32_t reserved2;
+	union {
+		uint32_t vas_invalid;
+		/* Invalid bit. If this bit is 1 the CRB is discarded by
+		 * NX upon fetching from the receive FIFO. If this bit is 0
+		 * the CRB is processed normally. The bit is stamped to 0
+		 * by VAS and may be written to 1 by hypervisor while
+		 * the CRB is in the receive FIFO (in memory).
+		 */
+
+	};
+};
+
+struct nx_stamped_fault_crb_t {
+	/*
+	 * A CRB that has a translation fault is stamped by NX in quadword 4
+	 * and pasted to the Fault Send Window in VAS.
+	 */
+	uint64_t fsa;
+	union {
+		uint32_t nxsf_t;
+		uint32_t nxsf_fs;
+	};
+	uint32_t pswid;
+};
+
+union stamped_crb_t {
+	struct vas_stamped_crb_t      vas;
+	struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+	/*
+	 * Coprocessor Parameter Block In/Out are used to pass metadata
+	 * to/from accelerator.  Tables 6.5 and 6.6 of the user manual.
+	 */
+
+	/* CPBInput */
+
+	struct {
+		union {
+		union nx_qw_t qw0;
+			struct {
+				uint32_t in_adler;            /* bits 0:31  */
+				uint32_t in_crc;              /* bits 32:63 */
+				union {
+					uint32_t in_histlen;  /* bits 64:75 */
+					uint32_t in_subc;     /* bits 93:95 */
+				};
+				union {
+					/* bits 108:111 */
+					uint32_t in_sfbt;
+					/* bits 112:127 */
+					uint32_t in_rembytecnt;
+					/* bits 116:127 */
+					uint32_t in_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  in_dht[DHTSZ];	/* qw[1:18]     */
+			char in_dht_char[DHT_MAXSZ];	/* byte access  */
+		};
+		union nx_qw_t  reserved[5];		/* qw[19:23]    */
+	};
+
+	/* CPBOutput */
+
+	volatile struct {
+		union {
+			union nx_qw_t qw24;
+			struct {
+				uint32_t out_adler;    /* bits 0:31  qw[24] */
+				uint32_t out_crc;      /* bits 32:63 qw[24] */
+				union {
+					/* bits 77:79 qw[24] */
+					uint32_t out_tebc;
+					/* bits 80:95 qw[24] */
+					uint32_t out_subc;
+				};
+				union {
+					/* bits 108:111 qw[24] */
+					uint32_t out_sfbt;
+					/* bits 112:127 qw[24] */
+					uint32_t out_rembytecnt;
+					/* bits 116:127 qw[24] */
+					uint32_t out_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  qw25[79];        /* qw[25:103] */
+			/* qw[25] compress no lzcounts or wrap */
+			uint32_t out_spbc_comp_wrap;
+			uint32_t out_spbc_wrap;         /* qw[25] wrap */
+			/* qw[25] compress no lzcounts */
+			uint32_t out_spbc_comp;
+			 /* 286 LL and 30 D symbol counts */
+			uint32_t out_lzcount[LLSZ+DSZ];
+			struct {
+				union nx_qw_t  out_dht[DHTSZ];  /* qw[25:42] */
+				/* qw[43] decompress */
+				uint32_t out_spbc_decomp;
+			};
+		};
+		/* qw[104] compress with lzcounts */
+		uint32_t out_spbc_comp_with_count;
+	};
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t gzip_fc;     /* bits[24-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	volatile struct nx_ccb_t ccb;         /* byte[48:63] */
+	volatile union {
+		/* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+		 * in crb earlier; JReilly says csb written with partial inject
+		 */
+		union nx_qw_t reserved64[11];
+		union stamped_crb_t stamp;       /* byte[64:79] */
+	};
+	volatile struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+	struct nx_gzip_crb_t crb;
+	struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field.  x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x)          ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers.  Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask        size_mask(8)
+#define dde_count_offset      23
+
+/* CSB */
+
+#define csb_v_mask            size_mask(1)
+#define csb_v_offset          0
+#define csb_f_mask            size_mask(1)
+#define csb_f_offset          6
+#define csb_cs_mask           size_mask(8)
+#define csb_cs_offset         15
+#define csb_cc_mask           size_mask(8)
+#define csb_cc_offset         23
+#define csb_ce_mask           size_mask(8)
+#define csb_ce_offset         31
+
+/* CCB */
+
+#define ccb_cm_mask           size_mask(3)
+#define ccb_cm_offset         31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask      size_mask(6)
+#define vas_buf_num_offset    5
+#define send_wc_id_mask       size_mask(16)
+#define send_wc_id_offset     31
+#define recv_wc_id_mask       size_mask(16)
+#define recv_wc_id_offset     31
+#define vas_invalid_mask      size_mask(1)
+#define vas_invalid_offset    31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask           size_mask(1)
+#define nxsf_t_offset         23
+#define nxsf_fs_mask          size_mask(8)
+#define nxsf_fs_offset        31
+
+/* CPB input */
+
+#define in_histlen_mask       size_mask(12)
+#define in_histlen_offset     11
+#define in_dhtlen_mask        size_mask(12)
+#define in_dhtlen_offset      31
+#define in_subc_mask          size_mask(3)
+#define in_subc_offset        31
+#define in_sfbt_mask          size_mask(4)
+#define in_sfbt_offset        15
+#define in_rembytecnt_mask    size_mask(16)
+#define in_rembytecnt_offset  31
+
+/* CPB output */
+
+#define out_tebc_mask         size_mask(3)
+#define out_tebc_offset       15
+#define out_subc_mask         size_mask(16)
+#define out_subc_offset       31
+#define out_sfbt_mask         size_mask(4)
+#define out_sfbt_offset       15
+#define out_rembytecnt_mask   size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask       size_mask(12)
+#define out_dhtlen_offset     31
+
+/* CRB */
+
+#define gzip_fc_mask          size_mask(8)
+#define gzip_fc_offset        31
+#define crb_c_mask            size_mask(1)
+#define crb_c_offset          28
+#define crb_at_mask           size_mask(1)
+#define crb_at_offset         30
+#define csb_address_mask      ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers.  Do not access registers directly
+ * because of the endian conversion.  P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG)      ((be32toh(ST.REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define getpnn(ST, REG)     ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define get32(ST, REG)      (be32toh(ST.REG))
+#define getp32(ST, REG)     (be32toh((ST)->REG))
+#define get64(ST, REG)      (be64toh(ST.REG))
+#define getp64(ST, REG)     (be64toh((ST)->REG))
+
+#define unget32(ST, REG)    (get32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG)   (getp32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST)      memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST)       do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+				} while (0)
+#define clearp_dde(ST)      do { (ST)->dde_count = (ST)->ddebc = 0; \
+				 (ST)->ddead = 0; \
+				} while (0)
+#define clear_struct(ST)    memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X)   (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+				 & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X)  ((ST)->REG = htobe32(ungetp32(ST, REG) \
+				| (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X)   (ST.REG = htobe32(X))
+#define putp32(ST, REG, X)  ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X)   (ST.REG = htobe64(X))
+#define putp64(ST, REG, X)  ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2).  Bits ce(3-7)
+ * unused.  Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL         0x4
+#define CSB_CE_TERMINATE       0x2
+#define CSB_CE_TPBC_VALID      0x1
+
+#define csb_ce_termination(X)         (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X)    (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X)  (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X)     (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X)          (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X)         csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X)         csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X)                ((X)&(CSB_CE_PARTIAL \
+					| CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT    0x4
+#define SFBT_FHT    0x5
+#define SFBT_DHT    0x6
+#define SFBT_HDR    0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK                               0x01
+#define GZIP_FC_COMPRESS_FHT                             0x00
+#define GZIP_FC_COMPRESS_DHT                             0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT                       0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT                       0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT                      0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT                      0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT                0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT                0x0e
+#define GZIP_FC_DECOMPRESS                               0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND          0x12
+#define GZIP_FC_DECOMPRESS_RESUME                        0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND   0x16
+#define GZIP_FC_WRAP                                     0x1e
+
+#define fc_is_compress(fc)  (((fc) & 0x10) == 0)
+#define fc_has_count(fc)    (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK             0
+#define ERR_NX_ALIGNMENT      1
+#define ERR_NX_OPOVERLAP      2
+#define ERR_NX_DATA_LENGTH    3
+#define ERR_NX_TRANSLATION    5
+#define ERR_NX_PROTECTION     6
+#define ERR_NX_EXTERNAL_UE7   7
+#define ERR_NX_INVALID_OP     8
+#define ERR_NX_PRIVILEGE      9
+#define ERR_NX_INTERNAL_UE   10
+#define ERR_NX_EXTERN_UE_WR  12
+#define ERR_NX_TARGET_SPACE  13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR     15
+#define ERR_NX_PROTECT_WR    16
+#define ERR_NX_SUBFUNCTION   17
+#define ERR_NX_FUNC_ABORT    18
+#define ERR_NX_BYTE_MAX      19
+#define ERR_NX_CORRUPT_CRB   20
+#define ERR_NX_INVALID_CRB   21
+#define ERR_NX_INVALID_DDE   30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW  33
+#define ERR_NX_TPBC_GT_SPBC  64
+#define ERR_NX_MISSING_CODE  66
+#define ERR_NX_INVALID_DIST  67
+#define ERR_NX_INVALID_DHT   68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER   224
+#define ERR_NX_AT_FAULT     250
+#define ERR_NX_INTR_SERVER  252
+#define ERR_NX_UE253        253
+#define ERR_NX_NO_HW        254
+#define ERR_NX_HUNG_OP      255
+#define ERR_NX_END          256
+
+/* initial values for non-resume operations */
+#define INIT_CRC   0  /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1  /* adler32(0L, Z_NULL, 0)  adler is initialized to 1 */
+
+/* prototypes */
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+
+extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
+extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x)	(x |= (unsigned char)1)
+#define clr_final_bit(x)	(x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+					} while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t eft_fc;      /* bits[29-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	struct nx_ccb_t ccb;                  /* byte[48:63] */
+	union {
+		union nx_qw_t reserved64[3];     /* byte[64:96] */
+	};
+	struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK                 size_mask(3)
+#define EFT_FC_OFFSET               31
+#define EFT_FC_COMPRESS             0x0
+#define EFT_FC_COMPRESS_WITH_CRC    0x1
+#define EFT_FC_DECOMPRESS           0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC  0x3
+#define EFT_FC_BLK_DATA_MOVE        0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
new file mode 120000
index 0000000..77fb4c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
new file mode 100755
index 0000000..c7b46c5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if [[ ! -w /dev/crypto/nx-gzip ]]; then
+	echo "Can't access /dev/crypto/nx-gzip, skipping"
+	echo "skip: $0"
+	exit 4
+fi
+
+set -e
+
+function cleanup
+{
+	rm -f nx-tempfile*
+}
+
+trap cleanup EXIT
+
+function test_sizes
+{
+	local n=$1
+	local fname="nx-tempfile.$n"
+
+	for size in 4K 64K 1M 64M
+	do
+		echo "Testing $size ($n) ..."
+		dd if=/dev/urandom of=$fname bs=$size count=1
+		./gzfht_test $fname
+		./gunz_test ${fname}.nx.gz
+	done
+}
+
+echo "Doing basic test of different sizes ..."
+test_sizes 0
+
+echo "Running tests in parallel ..."
+for i in {1..16}
+do
+	test_sizes $i &
+done
+
+wait
+
+echo "OK"
+
+exit 0
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
index e748f33..f69b1e2 100644
--- a/tools/testing/selftests/powerpc/pmu/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
 count_instructions
 l3_bank_test
 per_event_excludes
+count_stcx_fail
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 19046db..904672f 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions count_stcx_fail l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
 top_srcdir = ../../../../..
@@ -13,8 +13,12 @@
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: CFLAGS += -m64
 $(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
-	$(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/count_stcx_fail: CFLAGS += -m64
+$(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
+
 
 $(OUTPUT)/per_event_excludes: ../utils.c
 
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
new file mode 100644
index 0000000..2070a1e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+static void setup_event(struct event *e, u64 config, int type, char *name)
+{
+	event_init_opts(e, config, type, name);
+
+	e->attr.disabled = 1;
+	e->attr.exclude_kernel = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+			 u64 overhead, bool report)
+{
+	s64 difference, expected;
+	double percentage;
+	u64 dummy;
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Run for 1M instructions */
+	thirty_two_instruction_loop_with_ll_sc(instructions >> 5, &dummy);
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	event_read(&events[0]);
+	event_read(&events[1]);
+	event_read(&events[2]);
+
+	expected = instructions + overhead + (events[2].result.value * 10);
+	difference = events[0].result.value - expected;
+	percentage = (double)difference / events[0].result.value * 100;
+
+	if (report) {
+		printf("-----\n");
+		event_report(&events[0]);
+		event_report(&events[1]);
+		event_report(&events[2]);
+
+		printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+		printf("Expected %llu\n", expected);
+		printf("Actual   %llu\n", events[0].result.value);
+		printf("Delta    %lld, %f%%\n", difference, percentage);
+	}
+
+	event_reset(&events[0]);
+	event_reset(&events[1]);
+	event_reset(&events[2]);
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference below 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / events[0].result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+	u64 current, overhead;
+	int i;
+
+	do_count_loop(events, 0, 0, false);
+	overhead = events[0].result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(events, 0, 0, false);
+		current = events[0].result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %llu with %llu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+#define	PM_MRK_STCX_FAIL	0x03e158
+#define PM_STCX_FAIL	0x01e058
+
+static int test_body(void)
+{
+	struct event events[3];
+	u64 overhead;
+
+	// The STCX_FAIL event we use works on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
+	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
+	setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
+
+	if (event_open(&events[0])) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[1], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[2], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	overhead = determine_overhead(events);
+	printf("Overhead of null loop: %llu instructions\n", overhead);
+
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+	event_close(&events[0]);
+	event_close(&events[1]);
+
+	return 0;
+}
+
+static int count_ll_sc(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(count_ll_sc, "count_ll_sc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
index 42bddbe..2920fb3 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 reg_access_test
 event_attributes_test
 cycles_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 23f4caf..af3df79 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -1,12 +1,18 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../../../../../../scripts/Kbuild.include
+
 noarg:
 	$(MAKE) -C ../../
 
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
+TMPOUT = $(OUTPUT)/TMPDIR/
 # Toolchains may build PIE by default which breaks the assembly
-LDFLAGS += -no-pie
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+        $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += $(no-pie-option)
 
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
index 7c0fb5d..da2a3be 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/trace.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -18,7 +18,7 @@
 {
 	u8 type;
 	u8 length;
-	u8 data[0];
+	u8 data[];
 };
 
 struct trace_buffer
@@ -26,7 +26,7 @@
 	u64  size;
 	bool overflow;
 	void *tail;
-	u8   data[0];
+	u8   data[];
 };
 
 struct trace_buffer *trace_buffer_allocate(u64 size);
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512..a5dfa9b 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@
 	char *p;
 	int i;
 
+	// The L3 bank logic is only used on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	p = malloc(MALLOC_SIZE);
 	FAIL_IF(!p);
 
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d..bf1bec0 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
 #ifndef __SELFTESTS_POWERPC_PMU_LIB_H
 #define __SELFTESTS_POWERPC_PMU_LIB_H
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8cc9b5e..c52ba09 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -41,3 +41,38 @@
 	subi	r3,r3,1
 	b	FUNC_NAME(thirty_two_instruction_loop)
 FUNC_END(thirty_two_instruction_loop)
+
+FUNC_START(thirty_two_instruction_loop_with_ll_sc)
+	cmpdi	r3,0
+	beqlr
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 5
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+1:	ldarx	r6,0,r4		# 10
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 15
+	addi	r5,r5,1
+	addi	r5,r5,1
+	stdcx.	r6,0,r4
+	bne-	1b
+	addi	r5,r5,1		# 20
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 25
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 30
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop_with_ll_sc)
+FUNC_END(thirty_two_instruction_loop_with_ll_sc)
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2..ad32a09 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -23,12 +23,9 @@
 static int per_event_excludes(void)
 {
 	struct event *e, events[4];
-	char *platform;
 	int i;
 
-	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
-	FAIL_IF(!platform);
-	SKIP_IF(strcmp(platform, "power8") != 0);
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	/*
 	 * We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
index 4cc4e31..1e5c04e 100644
--- a/tools/testing/selftests/powerpc/primitives/.gitignore
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index dce19f2..0e96150 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ptrace-gpr
 ptrace-tm-gpr
 ptrace-tm-spd-gpr
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fe..bbc05ff 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr);
+	set_amr(info->amr);
 
 	/*
 	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index 200337d..c1f324a 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -148,6 +148,121 @@
 	return 0;
 }
 
+static int runtest_dar_outside(void)
+{
+	void *target;
+	volatile __u16 temp16;
+	volatile __u64 temp64;
+	struct perf_event_attr attr;
+	int break_fd;
+	unsigned long long breaks;
+	int fail = 0;
+	size_t res;
+
+	target = malloc(8);
+	if (!target) {
+		perror("malloc failed");
+		exit(EXIT_FAILURE);
+	}
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.exclude_kernel = 1;
+	attr.exclude_hv = 1;
+	attr.exclude_guest = 1;
+	attr.bp_type = HW_BREAKPOINT_RW;
+	/* watch middle half of target array */
+	attr.bp_addr = (__u64)(target + 2);
+	attr.bp_len = 4;
+	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (break_fd < 0) {
+		free(target);
+		perror("sys_perf_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Shouldn't hit. */
+	ioctl(break_fd, PERF_EVENT_IOC_RESET);
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+	temp16 = *((__u16 *)target);
+	*((__u16 *)target) = temp16;
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	if (breaks == 0) {
+		printf("TESTED: No overlap\n");
+	} else {
+		printf("FAILED: No overlap: %lld != 0\n", breaks);
+		fail = 1;
+	}
+
+	/* Hit */
+	ioctl(break_fd, PERF_EVENT_IOC_RESET);
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+	temp16 = *((__u16 *)(target + 1));
+	*((__u16 *)(target + 1)) = temp16;
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	if (breaks == 2) {
+		printf("TESTED: Partial overlap\n");
+	} else {
+		printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+		fail = 1;
+	}
+
+	/* Hit */
+	ioctl(break_fd, PERF_EVENT_IOC_RESET);
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+	temp16 = *((__u16 *)(target + 5));
+	*((__u16 *)(target + 5)) = temp16;
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	if (breaks == 2) {
+		printf("TESTED: Partial overlap\n");
+	} else {
+		printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+		fail = 1;
+	}
+
+	/* Shouldn't Hit */
+	ioctl(break_fd, PERF_EVENT_IOC_RESET);
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+	temp16 = *((__u16 *)(target + 6));
+	*((__u16 *)(target + 6)) = temp16;
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	if (breaks == 0) {
+		printf("TESTED: No overlap\n");
+	} else {
+		printf("FAILED: No overlap: %lld != 0\n", breaks);
+		fail = 1;
+	}
+
+	/* Hit */
+	ioctl(break_fd, PERF_EVENT_IOC_RESET);
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+	temp64 = *((__u64 *)target);
+	*((__u64 *)target) = temp64;
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	if (breaks == 2) {
+		printf("TESTED: Full overlap\n");
+	} else {
+		printf("FAILED: Full overlap: %lld != 2\n", breaks);
+		fail = 1;
+	}
+
+	free(target);
+	close(break_fd);
+	return fail;
+}
+
 static int runtest(void)
 {
 	int rwflag;
@@ -172,7 +287,9 @@
 				return ret;
 		}
 	}
-	return 0;
+
+	ret = runtest_dar_outside();
+	return ret;
 }
 
 
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 3066d31..2e0d86e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,323 +20,531 @@
 #include <signal.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
 #include "ptrace.h"
 
-/* Breakpoint access modes */
-enum {
-	BP_X = 1,
-	BP_RW = 2,
-	BP_W = 4,
+#define SPRN_PVR	0x11F
+#define PVR_8xx		0x00500000
+
+bool is_8xx;
+
+/*
+ * Use volatile on all global var so that compiler doesn't
+ * optimise their load/stores. Otherwise selftest can fail.
+ */
+static volatile __u64 glvar;
+
+#define DAWR_MAX_LEN 512
+static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512)));
+
+#define A_LEN 6
+#define B_LEN 6
+struct gstruct {
+	__u8 a[A_LEN]; /* double word aligned */
+	__u8 b[B_LEN]; /* double word unaligned */
 };
+static volatile struct gstruct gstruct __attribute__((aligned(512)));
 
-static pid_t child_pid;
-static struct ppc_debug_info dbginfo;
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
 
-static void get_dbginfo(void)
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
 {
-	int ret;
-
-	ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
-	if (ret) {
-		perror("Can't get breakpoint info\n");
+	if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+		perror("Can't get breakpoint info");
 		exit(-1);
 	}
 }
 
-static bool hwbreak_present(void)
+static bool dawr_present(struct ppc_debug_info *dbginfo)
 {
-	return (dbginfo.num_data_bps != 0);
+	return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
 }
 
-static bool dawr_present(void)
-{
-	return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
-}
-
-static void set_breakpoint_addr(void *addr)
-{
-	int ret;
-
-	ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
-	if (ret) {
-		perror("Can't set breakpoint addr\n");
-		exit(-1);
-	}
-}
-
-static int set_hwbreakpoint_addr(void *addr, int range)
-{
-	int ret;
-
-	struct ppc_hw_breakpoint info;
-
-	info.version = 1;
-	info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
-	info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
-	if (range > 0)
-		info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-	info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
-	info.addr = (__u64)addr;
-	info.addr2 = (__u64)addr + range;
-	info.condition_value = 0;
-
-	ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
-	if (ret < 0) {
-		perror("Can't set breakpoint\n");
-		exit(-1);
-	}
-	return ret;
-}
-
-static int del_hwbreakpoint_addr(int watchpoint_handle)
-{
-	int ret;
-
-	ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
-	if (ret < 0) {
-		perror("Can't delete hw breakpoint\n");
-		exit(-1);
-	}
-	return ret;
-}
-
-#define DAWR_LENGTH_MAX 512
-
-/* Dummy variables to test read/write accesses */
-static unsigned long long
-	dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
-	__attribute__((aligned(512)));
-static unsigned long long *dummy_var = dummy_array;
-
 static void write_var(int len)
 {
-	long long *plval;
-	char *pcval;
-	short *psval;
-	int *pival;
+	__u8 *pcvar;
+	__u16 *psvar;
+	__u32 *pivar;
+	__u64 *plvar;
 
 	switch (len) {
 	case 1:
-		pcval = (char *)dummy_var;
-		*pcval = 0xff;
+		pcvar = (__u8 *)&glvar;
+		*pcvar = 0xff;
 		break;
 	case 2:
-		psval = (short *)dummy_var;
-		*psval = 0xffff;
+		psvar = (__u16 *)&glvar;
+		*psvar = 0xffff;
 		break;
 	case 4:
-		pival = (int *)dummy_var;
-		*pival = 0xffffffff;
+		pivar = (__u32 *)&glvar;
+		*pivar = 0xffffffff;
 		break;
 	case 8:
-		plval = (long long *)dummy_var;
-		*plval = 0xffffffffffffffffLL;
+		plvar = (__u64 *)&glvar;
+		*plvar = 0xffffffffffffffffLL;
 		break;
 	}
 }
 
 static void read_var(int len)
 {
-	char cval __attribute__((unused));
-	short sval __attribute__((unused));
-	int ival __attribute__((unused));
-	long long lval __attribute__((unused));
+	__u8 cvar __attribute__((unused));
+	__u16 svar __attribute__((unused));
+	__u32 ivar __attribute__((unused));
+	__u64 lvar __attribute__((unused));
 
 	switch (len) {
 	case 1:
-		cval = *(char *)dummy_var;
+		cvar = (__u8)glvar;
 		break;
 	case 2:
-		sval = *(short *)dummy_var;
+		svar = (__u16)glvar;
 		break;
 	case 4:
-		ival = *(int *)dummy_var;
+		ivar = (__u32)glvar;
 		break;
 	case 8:
-		lval = *(long long *)dummy_var;
+		lvar = (__u64)glvar;
 		break;
 	}
 }
 
-/*
- * Do the r/w accesses to trigger the breakpoints. And run
- * the usual traps.
- */
-static void trigger_tests(void)
+static void test_workload(void)
 {
-	int len, ret;
+	__u8 cvar __attribute__((unused));
+	__u32 ivar __attribute__((unused));
+	int len = 0;
 
-	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
-	if (ret) {
-		perror("Can't be traced?\n");
-		return;
+	if (ptrace(PTRACE_TRACEME, 0, NULL, 0)) {
+		perror("Child can't be traced?");
+		exit(-1);
 	}
 
 	/* Wake up father so that it sets up the first test */
 	kill(getpid(), SIGUSR1);
 
-	/* Test write watchpoints */
-	for (len = 1; len <= sizeof(long); len <<= 1)
+	/* PTRACE_SET_DEBUGREG, WO test */
+	for (len = 1; len <= sizeof(glvar); len <<= 1)
 		write_var(len);
 
-	/* Test read/write watchpoints (on read accesses) */
-	for (len = 1; len <= sizeof(long); len <<= 1)
+	/* PTRACE_SET_DEBUGREG, RO test */
+	for (len = 1; len <= sizeof(glvar); len <<= 1)
 		read_var(len);
 
-	/* Test when breakpoint is unset */
+	/* PTRACE_SET_DEBUGREG, RW test */
+	for (len = 1; len <= sizeof(glvar); len <<= 1) {
+		if (rand() % 2)
+			read_var(len);
+		else
+			write_var(len);
+	}
 
-	/* Test write watchpoints */
-	for (len = 1; len <= sizeof(long); len <<= 1)
-		write_var(len);
+	/* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+	syscall(__NR_getcwd, &cwd, PATH_MAX);
 
-	/* Test read/write watchpoints (on read accesses) */
-	for (len = 1; len <= sizeof(long); len <<= 1)
-		read_var(len);
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+	write_var(1);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+	read_var(1);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+	if (rand() % 2)
+		write_var(1);
+	else
+		read_var(1);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+	syscall(__NR_getcwd, &cwd, PATH_MAX);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+	gstruct.a[rand() % A_LEN] = 'a';
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+	cvar = gstruct.a[rand() % A_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+	if (rand() % 2)
+		gstruct.a[rand() % A_LEN] = 'a';
+	else
+		cvar = gstruct.a[rand() % A_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+	gstruct.b[rand() % B_LEN] = 'b';
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+	cvar = gstruct.b[rand() % B_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+	if (rand() % 2)
+		gstruct.b[rand() % B_LEN] = 'b';
+	else
+		cvar = gstruct.b[rand() % B_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+	if (rand() % 2)
+		*((int *)(gstruct.a + 4)) = 10;
+	else
+		ivar = *((int *)(gstruct.a + 4));
+
+	/* PPC_PTRACE_SETHWDEBUG. DAWR_MAX_LEN. RW test */
+	if (rand() % 2)
+		big_var[rand() % DAWR_MAX_LEN] = 'a';
+	else
+		cvar = big_var[rand() % DAWR_MAX_LEN];
 }
 
-static void check_success(const char *msg)
+static void check_success(pid_t child_pid, const char *name, const char *type,
+			  unsigned long saddr, int len)
 {
-	const char *msg2;
 	int status;
+	siginfo_t siginfo;
+	unsigned long eaddr = (saddr + len - 1) | 0x7;
+
+	saddr &= ~0x7;
 
 	/* Wait for the child to SIGTRAP */
 	wait(&status);
 
-	msg2 = "Failed";
+	ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &siginfo);
 
-	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
-		msg2 = "Child process hit the breakpoint";
+	if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP ||
+	    (unsigned long)siginfo.si_addr < saddr ||
+	    (unsigned long)siginfo.si_addr > eaddr) {
+		printf("%s, %s, len: %d: Fail\n", name, type, len);
+		exit(-1);
 	}
 
-	printf("%s Result: [%s]\n", msg, msg2);
+	printf("%s, %s, len: %d: Ok\n", name, type, len);
+
+	if (!is_8xx) {
+		/*
+		 * For ptrace registered watchpoint, signal is generated
+		 * before executing load/store. Singlestep the instruction
+		 * and then continue the test.
+		 */
+		ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0);
+		wait(NULL);
+	}
 }
 
-static void launch_watchpoints(char *buf, int mode, int len,
-			       struct ppc_debug_info *dbginfo, bool dawr)
+static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr)
 {
-	const char *mode_str;
-	unsigned long data = (unsigned long)(dummy_var);
-	int wh, range;
+	if (ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, wp_addr)) {
+		perror("PTRACE_SET_DEBUGREG failed");
+		exit(-1);
+	}
+}
 
-	data &= ~0x7UL;
+static int ptrace_sethwdebug(pid_t child_pid, struct ppc_hw_breakpoint *info)
+{
+	int wh = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, info);
 
-	if (mode == BP_W) {
-		data |= (1UL << 1);
-		mode_str = "write";
-	} else {
-		data |= (1UL << 0);
-		data |= (1UL << 1);
-		mode_str = "read";
+	if (wh <= 0) {
+		perror("PPC_PTRACE_SETHWDEBUG failed");
+		exit(-1);
+	}
+	return wh;
+}
+
+static void ptrace_delhwdebug(pid_t child_pid, int wh)
+{
+	if (ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, wh) < 0) {
+		perror("PPC_PTRACE_DELHWDEBUG failed");
+		exit(-1);
+	}
+}
+
+#define DABR_READ_SHIFT		0
+#define DABR_WRITE_SHIFT	1
+#define DABR_TRANSLATION_SHIFT	2
+
+static int test_set_debugreg(pid_t child_pid)
+{
+	unsigned long wp_addr = (unsigned long)&glvar;
+	char *name = "PTRACE_SET_DEBUGREG";
+	int len;
+
+	/* PTRACE_SET_DEBUGREG, WO test*/
+	wp_addr &= ~0x7UL;
+	wp_addr |= (1UL << DABR_WRITE_SHIFT);
+	wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+	for (len = 1; len <= sizeof(glvar); len <<= 1) {
+		ptrace_set_debugreg(child_pid, wp_addr);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		check_success(child_pid, name, "WO", wp_addr, len);
 	}
 
-	/* Set DABR_TRANSLATION bit */
-	data |= (1UL << 2);
+	/* PTRACE_SET_DEBUGREG, RO test */
+	wp_addr &= ~0x7UL;
+	wp_addr |= (1UL << DABR_READ_SHIFT);
+	wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+	for (len = 1; len <= sizeof(glvar); len <<= 1) {
+		ptrace_set_debugreg(child_pid, wp_addr);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		check_success(child_pid, name, "RO", wp_addr, len);
+	}
 
-	/* use PTRACE_SET_DEBUGREG breakpoints */
-	set_breakpoint_addr((void *)data);
+	/* PTRACE_SET_DEBUGREG, RW test */
+	wp_addr &= ~0x7UL;
+	wp_addr |= (1Ul << DABR_READ_SHIFT);
+	wp_addr |= (1UL << DABR_WRITE_SHIFT);
+	wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+	for (len = 1; len <= sizeof(glvar); len <<= 1) {
+		ptrace_set_debugreg(child_pid, wp_addr);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		check_success(child_pid, name, "RW", wp_addr, len);
+	}
+
+	ptrace_set_debugreg(child_pid, 0);
+	return 0;
+}
+
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+	unsigned long wp_addr = (unsigned long)cwd;
+	char *name = "PTRACE_SET_DEBUGREG";
+
+	/* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+	wp_addr &= ~0x7UL;
+	wp_addr |= (1Ul << DABR_READ_SHIFT);
+	wp_addr |= (1UL << DABR_WRITE_SHIFT);
+	wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+	ptrace_set_debugreg(child_pid, wp_addr);
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-	check_success(buf);
-	/* Unregister hw brkpoint */
-	set_breakpoint_addr(NULL);
+	check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
 
-	data = (data & ~7); /* remove dabr control bits */
+	ptrace_set_debugreg(child_pid, 0);
+	return 0;
+}
 
-	/* use PPC_PTRACE_SETHWDEBUG breakpoint */
-	if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
-		return; /* not supported */
-	wh = set_hwbreakpoint_addr((void *)data, 0);
+static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
+				  unsigned long addr, int len)
+{
+	info->version = 1;
+	info->trigger_type = type;
+	info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+	info->addr = (__u64)addr;
+	info->addr2 = (__u64)addr + len;
+	info->condition_value = 0;
+	if (!len)
+		info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+	else
+		info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
+
+static void test_sethwdebug_exact(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr = (unsigned long)&glvar;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+	int len = 1; /* hardcoded in kernel */
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+	wh = ptrace_sethwdebug(child_pid, &info);
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-	check_success(buf);
-	/* Unregister hw brkpoint */
-	del_hwbreakpoint_addr(wh);
+	check_success(child_pid, name, "WO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
 
-	/* try a wider range */
-	range = 8;
-	if (dawr)
-		range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
-	wh = set_hwbreakpoint_addr((void *)data, range);
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, 0);
+	wh = ptrace_sethwdebug(child_pid, &info);
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-	check_success(buf);
-	/* Unregister hw brkpoint */
-	del_hwbreakpoint_addr(wh);
+	check_success(child_pid, name, "RO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, 0);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RW", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr = (unsigned long)&cwd;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+	int len = 1; /* hardcoded in kernel */
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_range_aligned(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED";
+	int len;
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+	wp_addr = (unsigned long)&gstruct.a;
+	len = A_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "WO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+	wp_addr = (unsigned long)&gstruct.a;
+	len = A_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+	wp_addr = (unsigned long)&gstruct.a;
+	len = A_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RW", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_range_unaligned(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED";
+	int len;
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+	wp_addr = (unsigned long)&gstruct.b;
+	len = B_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "WO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+	wp_addr = (unsigned long)&gstruct.b;
+	len = B_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RO", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+	wp_addr = (unsigned long)&gstruct.b;
+	len = B_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RW", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+
+}
+
+static void test_sethwdebug_range_unaligned_dar(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE";
+	int len;
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+	wp_addr = (unsigned long)&gstruct.b;
+	len = B_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RW", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_dawr_max_range(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr;
+	char *name = "PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN";
+	int len;
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW test */
+	wp_addr = (unsigned long)big_var;
+	len = DAWR_MAX_LEN;
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RW", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
 }
 
 /* Set the breakpoints and check the child successfully trigger them */
-static int launch_tests(bool dawr)
+static void
+run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
 {
-	char buf[1024];
-	int len, i, status;
-
-	struct ppc_debug_info dbginfo;
-
-	i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
-	if (i) {
-		perror("Can't set breakpoint info\n");
-		exit(-1);
+	test_set_debugreg(child_pid);
+	test_set_debugreg_kernel_userspace(child_pid);
+	test_sethwdebug_exact(child_pid);
+	test_sethwdebug_exact_kernel_userspace(child_pid);
+	if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
+		test_sethwdebug_range_aligned(child_pid);
+		if (dawr || is_8xx) {
+			test_sethwdebug_range_unaligned(child_pid);
+			test_sethwdebug_range_unaligned_dar(child_pid);
+			test_sethwdebug_dawr_max_range(child_pid);
+		}
 	}
-	if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
-		printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
-
-	/* Write watchpoint */
-	for (len = 1; len <= sizeof(long); len <<= 1)
-		launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
-
-	/* Read-Write watchpoint */
-	for (len = 1; len <= sizeof(long); len <<= 1)
-		launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
-
-	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-
-	/*
-	 * Now we have unregistered the breakpoint, access by child
-	 * should not cause SIGTRAP.
-	 */
-
-	wait(&status);
-
-	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
-		printf("FAIL: Child process hit the breakpoint, which is not expected\n");
-		ptrace(PTRACE_CONT, child_pid, NULL, 0);
-		return TEST_FAIL;
-	}
-
-	if (WIFEXITED(status))
-		printf("Child exited normally\n");
-
-	return TEST_PASS;
 }
 
 static int ptrace_hwbreak(void)
 {
-	pid_t pid;
-	int ret;
+	pid_t child_pid;
+	struct ppc_debug_info dbginfo;
 	bool dawr;
 
-	pid = fork();
-	if (!pid) {
-		trigger_tests();
+	child_pid = fork();
+	if (!child_pid) {
+		test_workload();
 		return 0;
 	}
 
 	wait(NULL);
 
-	child_pid = pid;
+	get_dbginfo(child_pid, &dbginfo);
+	SKIP_IF(dbginfo.num_data_bps == 0);
 
-	get_dbginfo();
-	SKIP_IF(!hwbreak_present());
-	dawr = dawr_present();
+	dawr = dawr_present(&dbginfo);
+	run_tests(child_pid, &dbginfo, dawr);
 
-	ret = launch_tests(dawr);
-
+	/* Let the child exit first. */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
 	wait(NULL);
 
-	return ret;
+	/*
+	 * Testcases exits immediately with -1 on any failure. If
+	 * it has reached here, it means all tests were successful.
+	 */
+	return TEST_PASS;
 }
 
 int main(int argc, char **argv, char **envp)
 {
+	int pvr = 0;
+	asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR));
+	if (pvr == PVR_8xx)
+		is_8xx = true;
+
 	return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index 3694613..bc454f8 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -125,7 +125,7 @@
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr1, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr1);
+	set_amr(info->amr1);
 
 	/* Wait for parent to read our AMR value and write a new one. */
 	ret = prod_parent(&info->child_sync);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a8..4436ca9 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@
 	pid_t pid;
 	int ret, status;
 
+	// TAR was added in v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e8..cb9875f 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@
 	pid_t pid;
 	int ret, status, i;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
 	for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index b8afb4f..4257a1f 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 rfi_flush
 entry_flush
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index e550a28..f25e854 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush entry_flush
+TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
@@ -8,3 +8,8 @@
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/spectre_v2: CFLAGS += -m64
+$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
+$(OUTPUT)/rfi_flush: flush_utils.c
+$(OUTPUT)/entry_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/branch_loops.S b/tools/testing/selftests/powerpc/security/branch_loops.S
new file mode 100644
index 0000000..22e9204
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/branch_loops.S
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2019, Michael Ellerman, IBM Corp.
+ */
+
+#include <ppc-asm.h>
+
+	.data
+
+jump_table:
+	.long	0x0
+	.long	(.Lstate_1 - .Lstate_0)
+	.long	(.Lstate_2 - .Lstate_0)
+	.long	(.Lstate_3 - .Lstate_0)
+	.long	(.Lstate_4 - .Lstate_0)
+	.long	(.Lstate_5 - .Lstate_0)
+	.long	(.Lstate_6 - .Lstate_0)
+	.long	(.Lstate_7 - .Lstate_0)
+
+	.text
+
+#define ITER_SHIFT	31
+
+.macro state number
+	.balign	32
+.Lstate_\number:
+	.if	\number==7
+	li	r3, 0
+	.else
+	li	r3, \number+1
+	.endif
+	b	.Lloop
+.endm
+
+FUNC_START(pattern_cache_loop)
+	li	r3, 0
+	li	r4, 1
+	sldi	r4, r4, ITER_SHIFT
+
+.Lloop:	cmpdi	r4, 0
+	beqlr
+
+	addi	r4, r4, -1
+
+	ld	r6, jump_table@got(%r2)
+	sldi	r5, r3, 2
+	lwax	r6, r5, r6
+	ld	r7, .Lstate_0@got(%r2)
+	add	r6, r6, r7
+	mtctr	r6
+	bctr
+
+	state	0
+	state	1
+	state	2
+	state	3
+	state	4
+	state	5
+	state	6
+	state	7
+
+FUNC_END(pattern_cache_loop)
+
+
+FUNC_START(indirect_branch_loop)
+	li	r3, 1
+	sldi	r3, r3, ITER_SHIFT
+
+1:	cmpdi	r3, 0
+	beqlr
+
+	addi	r3, r3, -1
+
+	ld	r4, 2f@got(%r2)
+	mtctr	r4
+	bctr
+
+	.balign 32
+2:	b	1b
+
+FUNC_END(indirect_branch_loop)
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
index e8d24f9..68ce377 100644
--- a/tools/testing/selftests/powerpc/security/entry_flush.c
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -15,32 +15,7 @@
 #include <string.h>
 #include <stdio.h>
 #include "utils.h"
-
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
-	__u64 nr;
-	__u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
-	__u64 tmp;
-
-	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
-	return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
-		  unsigned long zero_size)
-{
-	for (unsigned long i = 0; i < iterations; i++) {
-		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
-			load(p + j);
-		getppid();
-	}
-}
+#include "flush_utils.h"
 
 int entry_flush_test(void)
 {
@@ -78,7 +53,7 @@
 
 	entry_flush = entry_flush_orig;
 
-	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
 	FAIL_IF(fd < 0);
 
 	p = (char *)memalign(zero_size, CACHELINE_SIZE);
@@ -120,12 +95,13 @@
 		       repetitions * l1d_misses_expected / 2,
 		       repetitions - passes, repetitions);
 		rc = 1;
-	} else
+	} else {
 		printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
 		       entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
 		       entry_flush ? repetitions * l1d_misses_expected :
 		       repetitions * l1d_misses_expected / 2,
 		       passes, repetitions);
+	}
 
 	if (entry_flush == entry_flush_orig) {
 		entry_flush = !entry_flush_orig;
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
new file mode 100644
index 0000000..0c3c4c4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+static inline __u64 load(void *addr)
+{
+	__u64 tmp;
+
+	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+	return tmp;
+}
+
+void syscall_loop(char *p, unsigned long iterations,
+		  unsigned long zero_size)
+{
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		getppid();
+	}
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+void set_dscr(unsigned long val)
+{
+	static int init;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
new file mode 100644
index 0000000..7a3d602
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+
+#define CACHELINE_SIZE 128
+
+#define PERF_L1D_READ_MISS_CONFIG	((PERF_COUNT_HW_CACHE_L1D) | 		\
+					(PERF_COUNT_HW_CACHE_OP_READ << 8) |	\
+					(PERF_COUNT_HW_CACHE_RESULT_MISS << 16))
+
+void syscall_loop(char *p, unsigned long iterations,
+		  unsigned long zero_size);
+
+void set_dscr(unsigned long val);
+
+#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 533315e..f73484a 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -14,32 +14,8 @@
 #include <string.h>
 #include <stdio.h>
 #include "utils.h"
+#include "flush_utils.h"
 
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
-	__u64 nr;
-	__u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
-	__u64 tmp;
-
-	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
-	return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
-			 unsigned long zero_size)
-{
-	for (unsigned long i = 0; i < iterations; i++) {
-		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
-			load(p + j);
-		getppid();
-	}
-}
 
 int rfi_flush_test(void)
 {
@@ -55,6 +31,9 @@
 
 	SKIP_IF(geteuid() != 0);
 
+	// The PMU event we use only works on Power7 or later
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
 	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
 		perror("Unable to read powerpc/rfi_flush debugfs file");
 		SKIP_IF(1);
@@ -75,7 +54,7 @@
 
 	rfi_flush = rfi_flush_orig;
 
-	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
 	FAIL_IF(fd < 0);
 
 	p = (char *)memalign(zero_size, CACHELINE_SIZE);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
new file mode 100644
index 0000000..83647b8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018-2019 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include "utils.h"
+
+#include "../pmu/event.h"
+
+
+extern void pattern_cache_loop(void);
+extern void indirect_branch_loop(void);
+
+static int do_count_loop(struct event *events, bool is_p9, s64 *miss_percent)
+{
+	u64 pred, mpred;
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	if (is_p9)
+		pattern_cache_loop();
+	else
+		indirect_branch_loop();
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	event_read(&events[0]);
+	event_read(&events[1]);
+
+	// We could scale all the events by running/enabled but we're lazy
+	// As long as the PMU is uncontended they should all run
+	FAIL_IF(events[0].result.running != events[0].result.enabled);
+	FAIL_IF(events[1].result.running != events[1].result.enabled);
+
+	pred =  events[0].result.value;
+	mpred = events[1].result.value;
+
+	if (is_p9) {
+		event_read(&events[2]);
+		event_read(&events[3]);
+		FAIL_IF(events[2].result.running != events[2].result.enabled);
+		FAIL_IF(events[3].result.running != events[3].result.enabled);
+
+		pred  += events[2].result.value;
+		mpred += events[3].result.value;
+	}
+
+	*miss_percent = 100 * mpred / pred;
+
+	return 0;
+}
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+	event_init_named(e, config, name);
+
+	e->attr.disabled = 1;
+	e->attr.exclude_kernel = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_idle = 1;
+}
+
+enum spectre_v2_state {
+	VULNERABLE = 0,
+	UNKNOWN = 1,		// Works with FAIL_IF()
+	NOT_AFFECTED,
+	BRANCH_SERIALISATION,
+	COUNT_CACHE_DISABLED,
+	COUNT_CACHE_FLUSH_SW,
+	COUNT_CACHE_FLUSH_HW,
+	BTB_FLUSH,
+};
+
+static enum spectre_v2_state get_sysfs_state(void)
+{
+	enum spectre_v2_state state = UNKNOWN;
+	char buf[256];
+	int len;
+
+	memset(buf, 0, sizeof(buf));
+	FAIL_IF(read_sysfs_file("devices/system/cpu/vulnerabilities/spectre_v2", buf, sizeof(buf)));
+
+	// Make sure it's NULL terminated
+	buf[sizeof(buf) - 1] = '\0';
+
+	// Trim the trailing newline
+	len = strlen(buf);
+	FAIL_IF(len < 1);
+	buf[len - 1] = '\0';
+
+	printf("sysfs reports: '%s'\n", buf);
+
+	// Order matters
+	if (strstr(buf, "Vulnerable"))
+		state = VULNERABLE;
+	else if (strstr(buf, "Not affected"))
+		state = NOT_AFFECTED;
+	else if (strstr(buf, "Indirect branch serialisation (kernel only)"))
+		state = BRANCH_SERIALISATION;
+	else if (strstr(buf, "Indirect branch cache disabled"))
+		state = COUNT_CACHE_DISABLED;
+	else if (strstr(buf, "Software count cache flush (hardware accelerated)"))
+		state = COUNT_CACHE_FLUSH_HW;
+	else if (strstr(buf, "Software count cache flush"))
+		state = COUNT_CACHE_FLUSH_SW;
+	else if (strstr(buf, "Branch predictor state flush"))
+		state = BTB_FLUSH;
+
+	return state;
+}
+
+#define PM_BR_PRED_CCACHE	0x040a4	// P8 + P9
+#define PM_BR_MPRED_CCACHE	0x040ac	// P8 + P9
+#define PM_BR_PRED_PCACHE	0x048a0	// P9 only
+#define PM_BR_MPRED_PCACHE	0x048b0	// P9 only
+
+#define SPRN_PVR 287
+
+int spectre_v2_test(void)
+{
+	enum spectre_v2_state state;
+	struct event events[4];
+	s64 miss_percent;
+	bool is_p9;
+
+	// The PMU events we use only work on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	state = get_sysfs_state();
+	if (state == UNKNOWN) {
+		printf("Error: couldn't determine spectre_v2 mitigation state?\n");
+		return -1;
+	}
+
+	memset(events, 0, sizeof(events));
+
+	setup_event(&events[0], PM_BR_PRED_CCACHE,  "PM_BR_PRED_CCACHE");
+	setup_event(&events[1], PM_BR_MPRED_CCACHE, "PM_BR_MPRED_CCACHE");
+	FAIL_IF(event_open(&events[0]));
+	FAIL_IF(event_open_with_group(&events[1], events[0].fd) == -1);
+
+	is_p9 = ((mfspr(SPRN_PVR) >>  16) & 0xFFFF) == 0x4e;
+
+	if (is_p9) {
+		// Count pattern cache too
+		setup_event(&events[2], PM_BR_PRED_PCACHE,  "PM_BR_PRED_PCACHE");
+		setup_event(&events[3], PM_BR_MPRED_PCACHE, "PM_BR_MPRED_PCACHE");
+
+		FAIL_IF(event_open_with_group(&events[2], events[0].fd) == -1);
+		FAIL_IF(event_open_with_group(&events[3], events[0].fd) == -1);
+	}
+
+	FAIL_IF(do_count_loop(events, is_p9, &miss_percent));
+
+	event_report_justified(&events[0], 18, 10);
+	event_report_justified(&events[1], 18, 10);
+	event_close(&events[0]);
+	event_close(&events[1]);
+
+	if (is_p9) {
+		event_report_justified(&events[2], 18, 10);
+		event_report_justified(&events[3], 18, 10);
+		event_close(&events[2]);
+		event_close(&events[3]);
+	}
+
+	printf("Miss percent %lld %%\n", miss_percent);
+
+	switch (state) {
+	case VULNERABLE:
+	case NOT_AFFECTED:
+	case COUNT_CACHE_FLUSH_SW:
+	case COUNT_CACHE_FLUSH_HW:
+		// These should all not affect userspace branch prediction
+		if (miss_percent > 15) {
+			printf("Branch misses > 15%% unexpected in this configuration!\n");
+			printf("Possible mis-match between reported & actual mitigation\n");
+			/*
+			 * Such a mismatch may be caused by a guest system
+			 * reporting as vulnerable when the host is mitigated.
+			 * Return skip code to avoid detecting this as an error.
+			 * We are not vulnerable and reporting otherwise, so
+			 * missing such a mismatch is safe.
+			 */
+			if (miss_percent > 95)
+				return 4;
+
+			return 1;
+		}
+		break;
+	case BRANCH_SERIALISATION:
+		// This seems to affect userspace branch prediction a bit?
+		if (miss_percent > 25) {
+			printf("Branch misses > 25%% unexpected in this configuration!\n");
+			printf("Possible mis-match between reported & actual mitigation\n");
+			return 1;
+		}
+		break;
+	case COUNT_CACHE_DISABLED:
+		if (miss_percent < 95) {
+			printf("Branch misses < 20%% unexpected in this configuration!\n");
+			printf("Possible mis-match between reported & actual mitigation\n");
+			return 1;
+		}
+		break;
+	case UNKNOWN:
+	case BTB_FLUSH:
+		printf("Not sure!\n");
+		return 1;
+	}
+
+	printf("OK - Measured branch prediction rates match reported spectre v2 mitigation.\n");
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(spectre_v2_test, "spectre_v2");
+}
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index dca5852..405b536 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
 signal
 signal_tm
 sigfuz
+sigreturn_vdso
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 113838f..d6ae546 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := signal signal_tm sigfuz
+TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
 
 CFLAGS += -maltivec
 $(OUTPUT)/signal_tm: CFLAGS += -mhtm
 $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
 
+TEST_FILES := settings
+
 top_srcdir = ../../../../..
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/signal/settings b/tools/testing/selftests/powerpc/signal/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
new file mode 100644
index 0000000..e397226
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test that a syscall does not get restarted twice, handled by trap_norestart()
+ *
+ * Based on Al's description, and a test for the bug fixed in this commit:
+ *
+ * commit 9a81c16b527528ad307843be5571111aa8d35a80
+ * Author: Al Viro <viro@zeniv.linux.org.uk>
+ * Date:   Mon Sep 20 21:48:57 2010 +0100
+ *
+ *  powerpc: fix double syscall restarts
+ *
+ *  Make sigreturn zero regs->trap, make do_signal() do the same on all
+ *  paths.  As it is, signal interrupting e.g. read() from fd 512 (==
+ *  ERESTARTSYS) with another signal getting unblocked when the first
+ *  handler finishes will lead to restart one insn earlier than it ought
+ *  to.  Same for multiple signals with in-kernel handlers interrupting
+ *  that sucker at the same time.  Same for multiple signals of any kind
+ *  interrupting that sucker on 64bit...
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static void SIGUSR1_handler(int sig)
+{
+	kill(getpid(), SIGUSR2);
+	/*
+	 * SIGUSR2 is blocked until the handler exits, at which point it will
+	 * be raised again and think there is a restart to be done because the
+	 * pending restarted syscall has 512 (ERESTARTSYS) in r3. The second
+	 * restart will retreat NIP another 4 bytes to fail case branch.
+	 */
+}
+
+static void SIGUSR2_handler(int sig)
+{
+}
+
+static ssize_t raw_read(int fd, void *buf, size_t count)
+{
+	register long nr asm("r0") = __NR_read;
+	register long _fd asm("r3") = fd;
+	register void *_buf asm("r4") = buf;
+	register size_t _count asm("r5") = count;
+
+	asm volatile(
+"		b	0f		\n"
+"		b	1f		\n"
+"	0:	sc	0		\n"
+"		bns	2f		\n"
+"		neg	%0,%0		\n"
+"		b	2f		\n"
+"	1:				\n"
+"		li	%0,%4		\n"
+"	2:				\n"
+		: "+r"(_fd), "+r"(nr), "+r"(_buf), "+r"(_count)
+		: "i"(-ENOANO)
+		: "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "ctr", "cr0");
+
+	if (_fd < 0) {
+		errno = -_fd;
+		_fd = -1;
+	}
+
+	return _fd;
+}
+
+#define DATA "test 123"
+#define DLEN (strlen(DATA)+1)
+
+int test_restart(void)
+{
+	int pipefd[2];
+	pid_t pid;
+	char buf[512];
+
+	if (pipe(pipefd) == -1) {
+		perror("pipe");
+		exit(EXIT_FAILURE);
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		perror("fork");
+		exit(EXIT_FAILURE);
+	}
+
+	if (pid == 0) { /* Child reads from pipe */
+		struct sigaction act;
+		int fd;
+
+		memset(&act, 0, sizeof(act));
+		sigaddset(&act.sa_mask, SIGUSR2);
+		act.sa_handler = SIGUSR1_handler;
+		act.sa_flags = SA_RESTART;
+		if (sigaction(SIGUSR1, &act, NULL) == -1) {
+			perror("sigaction");
+			exit(EXIT_FAILURE);
+		}
+
+		memset(&act, 0, sizeof(act));
+		act.sa_handler = SIGUSR2_handler;
+		act.sa_flags = SA_RESTART;
+		if (sigaction(SIGUSR2, &act, NULL) == -1) {
+			perror("sigaction");
+			exit(EXIT_FAILURE);
+		}
+
+		/* Let's get ERESTARTSYS into r3 */
+		while ((fd = dup(pipefd[0])) != 512) {
+			if (fd == -1) {
+				perror("dup");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		if (raw_read(fd, buf, 512) == -1) {
+			if (errno == ENOANO) {
+				fprintf(stderr, "Double restart moved restart before sc instruction.\n");
+				_exit(EXIT_FAILURE);
+			}
+			perror("read");
+			exit(EXIT_FAILURE);
+		}
+
+		if (strncmp(buf, DATA, DLEN)) {
+			fprintf(stderr, "bad test string %s\n", buf);
+			exit(EXIT_FAILURE);
+		}
+
+		return 0;
+
+	} else {
+		int wstatus;
+
+		usleep(100000);		/* Hack to get reader waiting */
+		kill(pid, SIGUSR1);
+		usleep(100000);
+		if (write(pipefd[1], DATA, DLEN) != DLEN) {
+			perror("write");
+			exit(EXIT_FAILURE);
+		}
+		close(pipefd[0]);
+		close(pipefd[1]);
+		if (wait(&wstatus) == -1) {
+			perror("wait");
+			exit(EXIT_FAILURE);
+		}
+		if (!WIFEXITED(wstatus)) {
+			fprintf(stderr, "child exited abnormally\n");
+			exit(EXIT_FAILURE);
+		}
+
+		FAIL_IF(WEXITSTATUS(wstatus) != EXIT_SUCCESS);
+
+		return 0;
+	}
+}
+
+int main(void)
+{
+	test_harness_set_timeout(10);
+	return test_harness(test_restart, "sig sys restart");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c
index dade00c..08f9afe 100644
--- a/tools/testing/selftests/powerpc/signal/sigfuz.c
+++ b/tools/testing/selftests/powerpc/signal/sigfuz.c
@@ -42,7 +42,7 @@
 #include "utils.h"
 
 /* Selftest defaults */
-#define COUNT_MAX	4000		/* Number of interactions */
+#define COUNT_MAX	600		/* Number of interactions */
 #define THREADS		16		/* Number of threads */
 
 /* Arguments options */
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c
new file mode 100644
index 0000000..e282fff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can take signals with and without the VDSO mapped, which trigger
+ * different paths in the signal handling code.
+ *
+ * See handle_rt_signal64() and setup_trampoline() in signal_64.c
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Ensure assert() is not compiled out
+#undef NDEBUG
+#include <assert.h>
+
+#include "utils.h"
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+	unsigned long start, end;
+	static char buf[4096];
+	char name[128];
+	FILE *f;
+	int rc = -1;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, name);
+		if (rc == 2)
+			continue;
+
+		if (rc != 3) {
+			printf("sscanf errored\n");
+			rc = -1;
+			break;
+		}
+
+		if (strstr(name, needle)) {
+			*low = start;
+			*high = end - 1;
+			rc = 0;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	return rc;
+}
+
+static volatile sig_atomic_t took_signal = 0;
+
+static void sigusr1_handler(int sig)
+{
+	took_signal++;
+}
+
+int test_sigreturn_vdso(void)
+{
+	unsigned long low, high, size;
+	struct sigaction act;
+	char *p;
+
+	act.sa_handler = sigusr1_handler;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+
+	assert(sigaction(SIGUSR1, &act, NULL) == 0);
+
+	// Confirm the VDSO is mapped, and work out where it is
+	assert(search_proc_maps("[vdso]", &low, &high) == 0);
+	size = high - low + 1;
+	printf("VDSO is at 0x%lx-0x%lx (%lu bytes)\n", low, high, size);
+
+	kill(getpid(), SIGUSR1);
+	assert(took_signal == 1);
+	printf("Signal delivered OK with VDSO mapped\n");
+
+	// Remap the VDSO somewhere else
+	p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	assert(p != MAP_FAILED);
+	assert(mremap((void *)low, size, size, MREMAP_MAYMOVE|MREMAP_FIXED, p) != MAP_FAILED);
+	assert(search_proc_maps("[vdso]", &low, &high) == 0);
+	size = high - low + 1;
+	printf("VDSO moved to 0x%lx-0x%lx (%lu bytes)\n", low, high, size);
+
+	kill(getpid(), SIGUSR1);
+	assert(took_signal == 2);
+	printf("Signal delivered OK with VDSO moved\n");
+
+	assert(munmap((void *)low, size) == 0);
+	printf("Unmapped VDSO\n");
+
+	// Confirm the VDSO is not mapped anymore
+	assert(search_proc_maps("[vdso]", &low, &high) != 0);
+
+	// Make the stack executable
+	assert(search_proc_maps("[stack]", &low, &high) == 0);
+	size = high - low + 1;
+	mprotect((void *)low, size, PROT_READ|PROT_WRITE|PROT_EXEC);
+	printf("Remapped the stack executable\n");
+
+	kill(getpid(), SIGUSR1);
+	assert(took_signal == 3);
+	printf("Signal delivered OK with VDSO unmapped\n");
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_sigreturn_vdso, "sigreturn_vdso");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
index 31a17e0..b0dfc74 100644
--- a/tools/testing/selftests/powerpc/stringloops/.gitignore
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 memcmp_64
 memcmp_32
 strlen
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623..9c39f55 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@
 
 TEST_GEN_PROGS := memcmp_64 strlen
 
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
 $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
 
 ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa754..cb2f188 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <time.h>
+
 #include "utils.h"
 
 #define SIZE 256
@@ -13,6 +15,9 @@
 #define LARGE_MAX_OFFSET 32
 #define LARGE_SIZE_START 4096
 
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
 #define MAX_OFFSET_DIFF_S1_S2 48
 
 int vmx_count;
@@ -68,25 +73,25 @@
 
 static int testcase(bool islarge)
 {
-	char *s1;
-	char *s2;
-	unsigned long i;
+	unsigned long i, comp_size, alloc_size;
+	char *p, *s1, *s2;
+	int iterations;
 
-	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
-	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
-	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+	comp_size = (islarge ? LARGE_SIZE : SIZE);
+	alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
 
-	s1 = memalign(128, alloc_size);
-	if (!s1) {
-		perror("memalign");
-		exit(1);
-	}
+	p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
 
-	s2 = memalign(128, alloc_size);
-	if (!s2) {
-		perror("memalign");
-		exit(1);
-	}
+	/* Put s1/s2 at the end of a page */
+	s1 = p + MAP_SIZE - alloc_size;
+	s2 = p + 3 * MAP_SIZE - alloc_size;
+
+	/* And unmap the subsequent page to force a fault if we overread */
+	munmap(p + MAP_SIZE, MAP_SIZE);
+	munmap(p + 3 * MAP_SIZE, MAP_SIZE);
 
 	srandom(time(0));
 
@@ -147,6 +152,11 @@
 
 static int testcases(void)
 {
+#ifdef __powerpc64__
+	// vcmpequd used in memcmp_64.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
 	testcase(0);
 	testcase(1);
 	return 0;
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
index 89e762e..30e962c 100644
--- a/tools/testing/selftests/powerpc/switch_endian/.gitignore
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 switch_endian_test
 check-reversed.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc49304..7887f78 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
 
 	.data
 	.balign 8
-message:
+success_message:
 	.ascii "success: switch_endian_test\n\0"
 
+	.balign 8
+failure_message:
+	.ascii "failure: switch_endian_test\n\0"
+
 	.section ".toc"
 	.balign 8
 pattern:
@@ -64,6 +68,9 @@
 	li r0, __NR_switch_endian
 	sc
 
+	tdi   0, 0, 0x48	// b +8 if the endian was switched
+	b     .Lfail	  	// exit if endian didn't switch
+
 #include "check-reversed.S"
 
 	/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@
 
 #include "check.S"
 
+	ld	r4, success_message@got(%r2)
+	li	r5, 28	// strlen(success_message)
+	li	r14, 0	// exit status
+.Lout:
 	li	r0, __NR_write
 	li	r3, 1	/* stdout */
-	ld	r4, message@got(%r2)
-	li	r5, 28	/* strlen(message3) */
 	sc
 	li      r0, __NR_exit
-	li	r3, 0
+	mr	r3, r14
 	sc
 	b       .
+
+.Lfail:
+	ld	r4, failure_message@got(%r2)
+	li	r5, 28	// strlen(failure_message)
+	li	r14, 1
+	b	.Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
index f0f3fcc..b00cab2 100644
--- a/tools/testing/selftests/powerpc/syscalls/.gitignore
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b2277..b63f845 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
 
 CFLAGS += -I../../../../../usr/include
 
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 0000000..03b487f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x)		bswap_32(x)
+#define be32_to_cpu(x)		bswap_32(x)
+#else
+#define cpu_to_be32(x)		(x)
+#define be32_to_cpu(x)		(x)
+#endif
+
+#define RTAS_IO_ASSERT	-1098	/* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099	/* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+	__be32 token;
+	__be32 nargs;
+	__be32 nret;
+	__be32 args[16];
+	__be32 *rets;	  /* Pointer to return values in args[]. */
+};
+
+struct region {
+	uint64_t addr;
+	uint32_t size;
+	struct region *next;
+};
+
+int read_entire_file(int fd, char **buf, size_t *len)
+{
+	size_t buf_size = 0;
+	size_t off = 0;
+	int rc;
+
+	*buf = NULL;
+	do {
+		buf_size += BLOCK_SIZE;
+		if (*buf == NULL)
+			*buf = malloc(buf_size);
+		else
+			*buf = realloc(*buf, buf_size);
+
+		if (*buf == NULL)
+			return -ENOMEM;
+
+		rc = read(fd, *buf + off, BLOCK_SIZE);
+		if (rc < 0)
+			return -EIO;
+
+		off += rc;
+	} while (rc == BLOCK_SIZE);
+
+	if (len)
+		*len = off;
+
+	return 0;
+}
+
+static int open_prop_file(const char *prop_path, const char *prop_name, int *fd)
+{
+	char *path;
+	int len;
+
+	/* allocate enough for two string, a slash and trailing NULL */
+	len = strlen(prop_path) + strlen(prop_name) + 1 + 1;
+	path = malloc(len);
+	if (path == NULL)
+		return -ENOMEM;
+
+	snprintf(path, len, "%s/%s", prop_path, prop_name);
+
+	*fd = open(path, O_RDONLY);
+	free(path);
+	if (*fd < 0)
+		return -errno;
+
+	return 0;
+}
+
+static int get_property(const char *prop_path, const char *prop_name,
+			char **prop_val, size_t *prop_len)
+{
+	int rc, fd;
+
+	rc = open_prop_file(prop_path, prop_name, &fd);
+	if (rc)
+		return rc;
+
+	rc = read_entire_file(fd, prop_val, prop_len);
+	close(fd);
+
+	return rc;
+}
+
+int rtas_token(const char *call_name)
+{
+	char *prop_buf = NULL;
+	size_t len;
+	int rc;
+
+	rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+	if (rc < 0) {
+		rc = RTAS_UNKNOWN_OP;
+		goto err;
+	}
+
+	rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+	free(prop_buf);
+	return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+	char *buf;
+	int fd;
+	int rc;
+
+	fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY);
+	if (fd < 0) {
+		printf("Could not open rmo_buffer file\n");
+		return RTAS_IO_ASSERT;
+	}
+
+	rc = read_entire_file(fd, &buf, NULL);
+	close(fd);
+	if (rc) {
+		free(buf);
+		return rc;
+	}
+
+	sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+	free(buf);
+
+	if (!(kregion->size && kregion->addr) ||
+	    (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+		printf("Unexpected kregion bounds\n");
+		return RTAS_IO_ASSERT;
+	}
+
+	return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+		     int nrets, ...)
+{
+	struct rtas_args args;
+	__be32 *rets[16];
+	int i, rc, token;
+	va_list ap;
+
+	va_start(ap, nrets);
+
+	token = rtas_token(name);
+	if (token == RTAS_UNKNOWN_OP) {
+		// We don't care if the call doesn't exist
+		printf("call '%s' not available, skipping...", name);
+		rc = RTAS_UNKNOWN_OP;
+		goto err;
+	}
+
+	args.token = cpu_to_be32(token);
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nrets);
+
+	for (i = 0; i < nargs; i++)
+		args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+	for (i = 0; i < nrets; i++)
+		rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+	rc = syscall(__NR_rtas, &args);
+	if (rc) {
+		rc = -errno;
+		goto err;
+	}
+
+	if (nrets) {
+		*(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+		for (i = 1; i < nrets; i++) {
+			*(rets[i]) = args.args[nargs + i];
+		}
+	}
+
+err:
+	va_end(ap);
+	return rc;
+}
+
+static int test(void)
+{
+	struct region rmo_region;
+	uint32_t rmo_start;
+	uint32_t rmo_end;
+	__be32 rets[1];
+	int rc;
+
+	// Test a legitimate harmless call
+	// Expected: call succeeds
+	printf("Test a permitted call, no parameters... ");
+	rc = rtas_call("get-time-of-day", 0, 1, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+	// Test a prohibited call
+	// Expected: call returns -EINVAL
+	printf("Test a prohibited call... ");
+	rc = rtas_call("nvram-fetch", 0, 1, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Get RMO
+	rc = read_kregion_bounds(&rmo_region);
+	if (rc) {
+		printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+		return 0;
+	}
+	rmo_start = rmo_region.addr;
+	rmo_end = rmo_start + rmo_region.size - 1;
+	printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+	// Test a permitted call, user-supplied size, buffer inside RMO
+	// Expected: call succeeds
+	printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+		       cpu_to_be32(rmo_end - rmo_start + 1), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, user-supplied size, buffer start outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+		       cpu_to_be32(4000), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, user-supplied size, buffer end outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+		       cpu_to_be32(rmo_end - rmo_start + 2), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, fixed size, buffer end outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+	rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 98f2708..d8900a0 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tm-resched-dscr
 tm-syscall
 tm-signal-msr-resv
@@ -13,6 +14,7 @@
 tm-signal-context-chk-vsx
 tm-signal-context-force-tm
 tm-signal-sigreturn-nt
+tm-signal-pagefault
 tm-vmx-unavail
 tm-unavailable
 tm-trap
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index b15a1a3..5881e97 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,9 @@
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
 	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
 	$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
-	tm-signal-context-force-tm tm-poison
+	tm-signal-context-force-tm tm-poison tm-signal-pagefault
+
+TEST_FILES := settings
 
 top_srcdir = ../../../../..
 include ../../lib.mk
@@ -22,6 +24,8 @@
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
 $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 $(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-poison: CFLAGS += -m64
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/settings b/tools/testing/selftests/powerpc/tm/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 9775584..29e5f26 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -26,7 +26,7 @@
 
 int tm_poison_test(void)
 {
-	int pid;
+	int cpu, pid;
 	cpu_set_t cpuset;
 	uint64_t poison = 0xdeadbeefc0dec0fe;
 	uint64_t unknown = 0;
@@ -35,10 +35,13 @@
 
 	SKIP_IF(!have_htm());
 
-	/* Attach both Child and Parent to CPU 0 */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Attach both Child and Parent to the same CPU
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
-	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+	CPU_SET(cpu, &cpuset);
+	FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
 
 	pid = fork();
 	if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
index 3171762..421cb08 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
@@ -42,9 +42,10 @@
 #endif
 
 /* Setting contexts because the test will crash and we want to recover */
-ucontext_t init_context, main_context;
+ucontext_t init_context;
 
-static int count, first_time;
+/* count is changed in the signal handler, so it must be volatile */
+static volatile int count;
 
 void usr_signal_handler(int signo, siginfo_t *si, void *uc)
 {
@@ -98,11 +99,6 @@
 
 void seg_signal_handler(int signo, siginfo_t *si, void *uc)
 {
-	if (count == COUNT_MAX) {
-		/* Return to tm_signal_force_msr() and exit */
-		setcontext(&main_context);
-	}
-
 	count++;
 
 	/* Reexecute the test */
@@ -126,37 +122,41 @@
 	 */
 	getcontext(&init_context);
 
-	/* Allocated an alternative signal stack area */
-	ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
-			MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-	ss.ss_size = SIGSTKSZ;
-	ss.ss_flags = 0;
+	while (count < COUNT_MAX) {
+		/* Allocated an alternative signal stack area */
+		ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		ss.ss_size = SIGSTKSZ;
+		ss.ss_flags = 0;
 
-	if (ss.ss_sp == (void *)-1) {
-		perror("mmap error\n");
-		exit(-1);
+		if (ss.ss_sp == (void *)-1) {
+			perror("mmap error\n");
+			exit(-1);
+		}
+
+		/* Force the allocation through a page fault */
+		if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) {
+			perror("madvise\n");
+			exit(-1);
+		}
+
+		/*
+		 * Setting an alternative stack to generate a page fault when
+		 * the signal is raised.
+		 */
+		if (sigaltstack(&ss, NULL)) {
+			perror("sigaltstack\n");
+			exit(-1);
+		}
+
+		/* The signal handler will enable MSR_TS */
+		sigaction(SIGUSR1, &usr_sa, NULL);
+		/* If it does not crash, it might segfault, avoid it to retest */
+		sigaction(SIGSEGV, &seg_sa, NULL);
+
+		raise(SIGUSR1);
+		count++;
 	}
-
-	/* Force the allocation through a page fault */
-	if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) {
-		perror("madvise\n");
-		exit(-1);
-	}
-
-	/* Setting an alternative stack to generate a page fault when
-	 * the signal is raised.
-	 */
-	if (sigaltstack(&ss, NULL)) {
-		perror("sigaltstack\n");
-		exit(-1);
-	}
-
-	/* The signal handler will enable MSR_TS */
-	sigaction(SIGUSR1, &usr_sa, NULL);
-	/* If it does not crash, it will segfault, avoid it to retest */
-	sigaction(SIGSEGV, &seg_sa, NULL);
-
-	raise(SIGUSR1);
 }
 
 int tm_signal_context_force_tm(void)
@@ -169,11 +169,7 @@
 	 */
 	SKIP_IF(!is_ppc64le());
 
-	/* Will get back here after COUNT_MAX interactions */
-	getcontext(&main_context);
-
-	if (!first_time++)
-		tm_trap_test();
+	tm_trap_test();
 
 	return EXIT_SUCCESS;
 }
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
new file mode 100644
index 0000000..5908bc6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
+ *
+ * This test starts a transaction and triggers a signal, forcing a pagefault to
+ * happen when the kernel signal handling code touches the user signal stack.
+ *
+ * In order to avoid pre-faulting the signal stack memory and to force the
+ * pagefault to happen precisely in the kernel signal handling code, the
+ * pagefault handling is done in userspace using the userfaultfd facility.
+ *
+ * Further pagefaults are triggered by crafting the signal handler's ucontext
+ * to point to additional memory regions managed by the userfaultfd, so using
+ * the same mechanism used to avoid pre-faulting the signal stack memory.
+ *
+ * On failure (bug is present) kernel crashes or never returns control back to
+ * userspace. If bug is not present, tests completes almost immediately.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/userfaultfd.h>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <signal.h>
+#include <errno.h>
+
+#include "tm.h"
+
+
+#define UF_MEM_SIZE 655360	/* 10 x 64k pages */
+
+/* Memory handled by userfaultfd */
+static char *uf_mem;
+static size_t uf_mem_offset = 0;
+
+/*
+ * Data that will be copied into the faulting pages (instead of zero-filled
+ * pages). This is used to make the test more reliable and avoid segfaulting
+ * when we return from the signal handler. Since we are making the signal
+ * handler's ucontext point to newly allocated memory, when that memory is
+ * paged-in it will contain the expected content.
+ */
+static char backing_mem[UF_MEM_SIZE];
+
+static size_t pagesize;
+
+/*
+ * Return a chunk of at least 'size' bytes of memory that will be handled by
+ * userfaultfd. If 'backing_data' is not NULL, its content will be save to
+ * 'backing_mem' and then copied into the faulting pages when the page fault
+ * is handled.
+ */
+void *get_uf_mem(size_t size, void *backing_data)
+{
+	void *ret;
+
+	if (uf_mem_offset + size > UF_MEM_SIZE) {
+		fprintf(stderr, "Requesting more uf_mem than expected!\n");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = &uf_mem[uf_mem_offset];
+
+	/* Save the data that will be copied into the faulting page */
+	if (backing_data != NULL)
+		memcpy(&backing_mem[uf_mem_offset], backing_data, size);
+
+	/* Reserve the requested amount of uf_mem */
+	uf_mem_offset += size;
+	/* Keep uf_mem_offset aligned to the page size (round up) */
+	uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
+
+	return ret;
+}
+
+void *fault_handler_thread(void *arg)
+{
+	struct uffd_msg msg;	/* Data read from userfaultfd */
+	long uffd;		/* userfaultfd file descriptor */
+	struct uffdio_copy uffdio_copy;
+	struct pollfd pollfd;
+	ssize_t nread, offset;
+
+	uffd = (long) arg;
+
+	for (;;) {
+		pollfd.fd = uffd;
+		pollfd.events = POLLIN;
+		if (poll(&pollfd, 1, -1) == -1) {
+			perror("poll() failed");
+			exit(EXIT_FAILURE);
+		}
+
+		nread = read(uffd, &msg, sizeof(msg));
+		if (nread == 0) {
+			fprintf(stderr, "read(): EOF on userfaultfd\n");
+			exit(EXIT_FAILURE);
+		}
+
+		if (nread == -1) {
+			perror("read() failed");
+			exit(EXIT_FAILURE);
+		}
+
+		/* We expect only one kind of event */
+		if (msg.event != UFFD_EVENT_PAGEFAULT) {
+			fprintf(stderr, "Unexpected event on userfaultfd\n");
+			exit(EXIT_FAILURE);
+		}
+
+		/*
+		 * We need to handle page faults in units of pages(!).
+		 * So, round faulting address down to page boundary.
+		 */
+		uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
+
+		offset = (char *) uffdio_copy.dst - uf_mem;
+		uffdio_copy.src = (unsigned long) &backing_mem[offset];
+
+		uffdio_copy.len = pagesize;
+		uffdio_copy.mode = 0;
+		uffdio_copy.copy = 0;
+		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
+			perror("ioctl-UFFDIO_COPY failed");
+			exit(EXIT_FAILURE);
+		}
+	}
+}
+
+void setup_uf_mem(void)
+{
+	long uffd;		/* userfaultfd file descriptor */
+	pthread_t thr;
+	struct uffdio_api uffdio_api;
+	struct uffdio_register uffdio_register;
+	int ret;
+
+	pagesize = sysconf(_SC_PAGE_SIZE);
+
+	/* Create and enable userfaultfd object */
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd == -1) {
+		perror("userfaultfd() failed");
+		exit(EXIT_FAILURE);
+	}
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+		perror("ioctl-UFFDIO_API failed");
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Create a private anonymous mapping. The memory will be demand-zero
+	 * paged, that is, not yet allocated. When we actually touch the memory
+	 * the related page will be allocated via the userfaultfd mechanism.
+	 */
+	uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (uf_mem == MAP_FAILED) {
+		perror("mmap() failed");
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Register the memory range of the mapping we've just mapped to be
+	 * handled by the userfaultfd object. In 'mode' we request to track
+	 * missing pages (i.e. pages that have not yet been faulted-in).
+	 */
+	uffdio_register.range.start = (unsigned long) uf_mem;
+	uffdio_register.range.len = UF_MEM_SIZE;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+		perror("ioctl-UFFDIO_REGISTER");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Create a thread that will process the userfaultfd events */
+	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
+	if (ret != 0) {
+		fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
+		exit(EXIT_FAILURE);
+	}
+}
+
+/*
+ * Assumption: the signal was delivered while userspace was in transactional or
+ * suspended state, i.e. uc->uc_link != NULL.
+ */
+void signal_handler(int signo, siginfo_t *si, void *uc)
+{
+	ucontext_t *ucp = uc;
+
+	/* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
+	ucp->uc_link->uc_mcontext.regs->nip += 4;
+
+	ucp->uc_mcontext.v_regs =
+		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
+
+	ucp->uc_link->uc_mcontext.v_regs =
+		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
+
+	ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
+}
+
+bool have_userfaultfd(void)
+{
+	long rc;
+
+	errno = 0;
+	rc = syscall(__NR_userfaultfd, -1);
+
+	return rc == 0 || errno != ENOSYS;
+}
+
+int tm_signal_pagefault(void)
+{
+	struct sigaction sa;
+	stack_t ss;
+
+	SKIP_IF(!have_htm());
+	SKIP_IF(!have_userfaultfd());
+
+	setup_uf_mem();
+
+	/*
+	 * Set an alternative stack that will generate a page fault when the
+	 * signal is raised. The page fault will be treated via userfaultfd,
+	 * i.e. via fault_handler_thread.
+	 */
+	ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
+	ss.ss_size = SIGSTKSZ;
+	ss.ss_flags = 0;
+	if (sigaltstack(&ss, NULL) == -1) {
+		perror("sigaltstack() failed");
+		exit(EXIT_FAILURE);
+	}
+
+	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+	sa.sa_sigaction = signal_handler;
+	if (sigaction(SIGTRAP, &sa, NULL) == -1) {
+		perror("sigaction() failed");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Trigger a SIGTRAP in transactional state */
+	asm __volatile__(
+			"tbegin.;"
+			"beq    1f;"
+			"trap;"
+			"1: ;"
+			: : : "memory");
+
+	/* Trigger a SIGTRAP in suspended state */
+	asm __volatile__(
+			"tbegin.;"
+			"beq    1f;"
+			"tsuspend.;"
+			"trap;"
+			"tresume.;"
+			"1: ;"
+			: : : "memory");
+
+	return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+	/*
+	 * Depending on kernel config, the TM Bad Thing might not result in a
+	 * crash, instead the kernel never returns control back to userspace, so
+	 * set a tight timeout. If the test passes it completes almost
+	 * immediately.
+	 */
+	test_harness_set_timeout(2);
+	return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3..794d574 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
 #include "utils.h"
 #include "tm.h"
 
-int	num_loops	= 10000;
+int	num_loops	= 1000000;
 int	passed = 1;
 
 void tfiar_tfhar(void *in)
 {
-	int i, cpu;
 	unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
-	cpu_set_t cpuset;
-
-	CPU_ZERO(&cpuset);
-	cpu = (unsigned long)in >> 1;
-	CPU_SET(cpu, &cpuset);
-	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+	int i;
 
 	/* TFIAR: Last bit has to be high so userspace can read register */
 	tfiar = ((unsigned long)in) + 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1..c75960a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -247,8 +247,7 @@
 int tm_trap_test(void)
 {
 	uint16_t k = 1;
-
-	int rc;
+	int cpu, rc;
 
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
@@ -267,9 +266,12 @@
 	usr1_sa.sa_sigaction = usr1_signal_handler;
 	sigaction(SIGUSR1, &usr1_sa, NULL);
 
-	/* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Set only one CPU in the mask. Both threads will be bound to that CPU.
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
+	CPU_SET(cpu, &cpuset);
 
 	/* Init pthread attribute */
 	rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fcc..a1348a5 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,19 @@
 
 int tm_unavailable_test(void)
 {
-	int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+	int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
 	pthread_t t1;
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
 
 	SKIP_IF(!have_htm());
 
-	/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Set only one CPU in the mask. Both threads will be bound to that CPU.
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
+	CPU_SET(cpu, &cpuset);
 
 	/* Init pthread attribute. */
 	rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464..c5a1e5c 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,9 +6,8 @@
 #ifndef _SELFTESTS_POWERPC_TM_TM_H
 #define _SELFTESTS_POWERPC_TM_TM_H
 
-#include <asm/tm.h>
-#include <asm/cputable.h>
 #include <stdbool.h>
+#include <asm/tm.h>
 
 #include "utils.h"
 
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 176102e..1f36ee1 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,7 +10,6 @@
 #include <fcntl.h>
 #include <link.h>
 #include <sched.h>
-#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -140,6 +139,26 @@
 	return strcmp(uts.machine, "ppc64le") == 0;
 }
 
+int read_sysfs_file(char *fpath, char *result, size_t result_size)
+{
+	char path[PATH_MAX] = "/sys/";
+	int rc = -1, fd;
+
+	strncat(path, fpath, PATH_MAX - strlen(path) - 1);
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return rc;
+
+	rc = read(fd, result, result_size);
+
+	close(fd);
+
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
 int read_debugfs_file(char *debugfs_file, int *result)
 {
 	int rc = -1, fd;
@@ -253,36 +272,32 @@
 	return 0;
 }
 
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
+int using_hash_mmu(bool *using_hash)
 {
-	static int warned = 0;
-	ucontext_t *ctx = (ucontext_t *)unused;
-	unsigned long *pc = &UCONTEXT_NIA(ctx);
+	char line[128];
+	FILE *f;
+	int rc;
 
-	/* mtspr 3,RS to check for move to DSCR below */
-	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
-		if (!warned++)
-			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
-		*pc += 4;
-	} else {
-		printf("SIGILL at %p\n", pc);
-		abort();
-	}
-}
+	f = fopen("/proc/cpuinfo", "r");
+	FAIL_IF(!f);
 
-void set_dscr(unsigned long val)
-{
-	static int init = 0;
-	struct sigaction sa;
+	rc = 0;
+	while (fgets(line, sizeof(line), f) != NULL) {
+		if (!strcmp(line, "MMU		: Hash\n") ||
+		    !strcmp(line, "platform	: Cell\n") ||
+		    !strcmp(line, "platform	: PowerMac\n")) {
+			*using_hash = true;
+			goto out;
+		}
 
-	if (!init) {
-		memset(&sa, 0, sizeof(sa));
-		sa.sa_sigaction = sigill_handler;
-		sa.sa_flags = SA_SIGINFO;
-		if (sigaction(SIGILL, &sa, NULL))
-			perror("sigill_handler");
-		init = 1;
+		if (strcmp(line, "MMU		: Radix\n") == 0) {
+			*using_hash = false;
+			goto out;
+		}
 	}
 
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+	rc = -1;
+out:
+	fclose(f);
+	return rc;
 }
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
index 7c04395..b744aed 100644
--- a/tools/testing/selftests/powerpc/vphn/.gitignore
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 test-vphn