feat(sve): add helper routines to read, write, compare SVE registers

Add helper routines to read, write, write_rand and compare SVE
Z, P, FFR registers.

These helper routines can be called by testcases running in NS-EL2,
R-EL1, S-EL1 payload. The caller has to configure SVE vector length and
has to pass memory to read/write SVE registers.

Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: I3fa064c76a498ee2348d92cba2544a6e50331e15
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index ed5678e..4458001 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -13,38 +13,66 @@
 #define fill_sve_helper(num) "ldr z"#num", [%0, #"#num", MUL VL];"
 #define read_sve_helper(num) "str z"#num", [%0, #"#num", MUL VL];"
 
+#define fill_sve_p_helper(num) "ldr p"#num", [%0, #"#num", MUL VL];"
+#define read_sve_p_helper(num) "str p"#num", [%0, #"#num", MUL VL];"
+
 /*
  * Max. vector length permitted by the architecture:
  * SVE:	 2048 bits = 256 bytes
  */
-#define SVE_VECTOR_LEN_BYTES		256
-#define SVE_NUM_VECTORS			32
+#define SVE_VECTOR_LEN_BYTES		(256U)
+#define SVE_NUM_VECTORS			(32U)
+
+/* Max size of one predicate register is 1/8 of Z register */
+#define SVE_P_REG_LEN_BYTES		(SVE_VECTOR_LEN_BYTES / 8U)
+#define SVE_NUM_P_REGS			(16U)
+
+/* Max size of one FFR register is 1/8 of Z register */
+#define SVE_FFR_REG_LEN_BYTES		(SVE_VECTOR_LEN_BYTES / 8U)
+#define SVE_NUM_FFR_REGS		(1U)
 
 #define SVE_VQ_ARCH_MIN			(0U)
-#define SVE_VQ_ARCH_MAX			((1 << ZCR_EL2_SVE_VL_WIDTH) - 1)
+#define SVE_VQ_ARCH_MAX			((1U << ZCR_EL2_SVE_VL_WIDTH) - 1U)
 
 /* convert SVE VL in bytes to VQ */
-#define SVE_VL_TO_VQ(vl_bytes)		(((vl_bytes) >> 4U) - 1)
+#define SVE_VL_TO_VQ(vl_bytes)		(((vl_bytes) >> 4U) - 1U)
 
 /* convert SVE VQ to bits */
 #define SVE_VQ_TO_BITS(vq)		(((vq) + 1U) << 7U)
 
 /* convert SVE VQ to bytes */
-#define SVE_VQ_TO_BYTES(vq)		(SVE_VQ_TO_BITS(vq) / 8)
+#define SVE_VQ_TO_BYTES(vq)		(SVE_VQ_TO_BITS(vq) / 8U)
 
 /* get a random SVE VQ b/w 0 to SVE_VQ_ARCH_MAX */
-#define SVE_GET_RANDOM_VQ		(rand() % (SVE_VQ_ARCH_MAX + 1))
+#define SVE_GET_RANDOM_VQ		(rand() % (SVE_VQ_ARCH_MAX + 1U))
 
 #ifndef __ASSEMBLY__
 
 typedef uint8_t sve_z_regs_t[SVE_NUM_VECTORS * SVE_VECTOR_LEN_BYTES]
 		__aligned(16);
+typedef uint8_t sve_p_regs_t[SVE_NUM_P_REGS * SVE_P_REG_LEN_BYTES]
+		__aligned(16);
+typedef uint8_t sve_ffr_regs_t[SVE_NUM_FFR_REGS * SVE_FFR_REG_LEN_BYTES]
+		__aligned(16);
 
 void sve_config_vq(uint8_t sve_vq);
 uint32_t sve_probe_vl(uint8_t sve_max_vq);
 
 void sve_z_regs_write(const sve_z_regs_t *z_regs);
+void sve_z_regs_write_rand(sve_z_regs_t *z_regs);
 void sve_z_regs_read(sve_z_regs_t *z_regs);
+uint64_t sve_z_regs_compare(const sve_z_regs_t *s1, const sve_z_regs_t *s2);
+
+void sve_p_regs_write(const sve_p_regs_t *p_regs);
+void sve_p_regs_write_rand(sve_p_regs_t *p_regs);
+void sve_p_regs_read(sve_p_regs_t *p_regs);
+uint64_t sve_p_regs_compare(const sve_p_regs_t *s1, const sve_p_regs_t *s2);
+
+void sve_ffr_regs_write(const sve_ffr_regs_t *ffr_regs);
+void sve_ffr_regs_write_rand(sve_ffr_regs_t *ffr_regs);
+void sve_ffr_regs_read(sve_ffr_regs_t *ffr_regs);
+uint64_t sve_ffr_regs_compare(const sve_ffr_regs_t *s1,
+			      const sve_ffr_regs_t *s2);
 
 /* Assembly routines */
 bool sve_subtract_arrays_interleaved(int *dst_array, int *src_array1,
diff --git a/lib/extensions/sve/aarch64/sve.c b/lib/extensions/sve/aarch64/sve.c
index 10e1b3b..b6b4182 100644
--- a/lib/extensions/sve/aarch64/sve.c
+++ b/lib/extensions/sve/aarch64/sve.c
@@ -22,6 +22,7 @@
 	} else {
 		write_zcr_el1(reg_val);
 	}
+
 	isb();
 }
 
@@ -84,10 +85,9 @@
 	return vl_bitmap;
 }
 
+/* Write SVE Z[0-31] registers passed in 'z_regs' */
 void sve_z_regs_write(const sve_z_regs_t *z_regs)
 {
-	assert(is_armv8_2_sve_present());
-
 	__asm__ volatile(
 		".arch_extension sve\n"
 		fill_sve_helper(0)
@@ -126,10 +126,9 @@
 		: : "r" (z_regs));
 }
 
+/* Read SVE Z[0-31] and store it in 'zregs' */
 void sve_z_regs_read(sve_z_regs_t *z_regs)
 {
-	assert(is_armv8_2_sve_present());
-
 	__asm__ volatile(
 		".arch_extension sve\n"
 		read_sve_helper(0)
@@ -167,3 +166,246 @@
 		".arch_extension nosve\n"
 		: : "r" (z_regs));
 }
+
+/* Write SVE P[0-15] registers passed in 'p_regs' */
+void sve_p_regs_write(const sve_p_regs_t *p_regs)
+{
+	__asm__ volatile(
+		".arch_extension sve\n"
+		fill_sve_p_helper(0)
+		fill_sve_p_helper(1)
+		fill_sve_p_helper(2)
+		fill_sve_p_helper(3)
+		fill_sve_p_helper(4)
+		fill_sve_p_helper(5)
+		fill_sve_p_helper(6)
+		fill_sve_p_helper(7)
+		fill_sve_p_helper(8)
+		fill_sve_p_helper(9)
+		fill_sve_p_helper(10)
+		fill_sve_p_helper(11)
+		fill_sve_p_helper(12)
+		fill_sve_p_helper(13)
+		fill_sve_p_helper(14)
+		fill_sve_p_helper(15)
+		".arch_extension nosve\n"
+		: : "r" (p_regs));
+}
+
+/* Read SVE P[0-15] registers and store it in 'p_regs' */
+void sve_p_regs_read(sve_p_regs_t *p_regs)
+{
+	__asm__ volatile(
+		".arch_extension sve\n"
+		read_sve_p_helper(0)
+		read_sve_p_helper(1)
+		read_sve_p_helper(2)
+		read_sve_p_helper(3)
+		read_sve_p_helper(4)
+		read_sve_p_helper(5)
+		read_sve_p_helper(6)
+		read_sve_p_helper(7)
+		read_sve_p_helper(8)
+		read_sve_p_helper(9)
+		read_sve_p_helper(10)
+		read_sve_p_helper(11)
+		read_sve_p_helper(12)
+		read_sve_p_helper(13)
+		read_sve_p_helper(14)
+		read_sve_p_helper(15)
+		".arch_extension nosve\n"
+		: : "r" (p_regs));
+}
+
+/* Write SVE FFR registers passed in 'ffr_regs' */
+void sve_ffr_regs_write(const sve_ffr_regs_t *ffr_regs)
+{
+	uint8_t sve_p_reg[SVE_P_REG_LEN_BYTES];
+
+	/* Save p0. Load 'ffr_regs' to p0 and write FFR. Restore p0 */
+	__asm__ volatile(
+		".arch_extension sve\n"
+		"	str	p0, [%1]\n"
+		"	ldr	p0, [%0]\n"
+		"	wrffr	p0.B\n"
+		"	ldr	p0, [%1]\n"
+		".arch_extension nosve\n"
+		:
+		: "r" (ffr_regs), "r" (sve_p_reg)
+		: "memory");
+}
+
+/* Read SVE FFR registers and store it in 'ffr_regs' */
+void sve_ffr_regs_read(sve_ffr_regs_t *ffr_regs)
+{
+	uint8_t sve_p_reg[SVE_P_REG_LEN_BYTES];
+
+	/* Save p0. Read FFR to p0 and save p0 (ffr) to 'ffr_regs'. Restore p0 */
+	__asm__ volatile(
+		".arch_extension sve\n"
+		"	str	p0, [%1]\n"
+		"	rdffr	p0.B\n"
+		"	str	p0, [%0]\n"
+		"	ldr	p0, [%1]\n"
+		".arch_extension nosve\n"
+		:
+		: "r" (ffr_regs), "r" (sve_p_reg)
+		: "memory");
+}
+
+/*
+ * Generate random values and write it to 'z_regs', then write it to SVE Z
+ * registers.
+ */
+void sve_z_regs_write_rand(sve_z_regs_t *z_regs)
+{
+	uint32_t rval;
+	uint32_t z_size;
+	uint8_t *z_reg;
+
+	z_size = (uint32_t)sve_rdvl_1();
+
+	/* Write Z regs */
+	rval = rand();
+	memset((void *)z_regs, 0, sizeof(sve_z_regs_t));
+	for (uint32_t i = 0U; i < SVE_NUM_VECTORS; i++) {
+		z_reg = (uint8_t *)z_regs + (i * z_size);
+
+		memset((void *)z_reg, rval * (i + 1), z_size);
+	}
+	sve_z_regs_write(z_regs);
+}
+
+/*
+ * Generate random values and write it to 'p_regs', then write it to SVE P
+ * registers.
+ */
+void sve_p_regs_write_rand(sve_p_regs_t *p_regs)
+{
+	uint32_t p_size;
+	uint8_t *p_reg;
+	uint32_t rval;
+
+	p_size = (uint32_t)sve_rdvl_1() / 8;
+
+	/* Write P regs */
+	rval = rand();
+	memset((void *)p_regs, 0, sizeof(sve_p_regs_t));
+	for (uint32_t i = 0U; i < SVE_NUM_P_REGS; i++) {
+		p_reg = (uint8_t *)p_regs + (i * p_size);
+
+		memset((void *)p_reg, rval * (i + 1), p_size);
+	}
+	sve_p_regs_write(p_regs);
+}
+
+/*
+ * Generate random values and write it to 'ffr_regs', then write it to SVE FFR
+ * registers.
+ */
+void sve_ffr_regs_write_rand(sve_ffr_regs_t *ffr_regs)
+{
+	uint32_t ffr_size;
+	uint8_t *ffr_reg;
+	uint32_t rval;
+
+	ffr_size = (uint32_t)sve_rdvl_1() / 8;
+
+	rval = rand();
+	memset((void *)ffr_regs, 0, sizeof(sve_ffr_regs_t));
+	for (uint32_t i = 0U; i < SVE_NUM_FFR_REGS; i++) {
+		ffr_reg = (uint8_t *)ffr_regs + (i * ffr_size);
+
+		memset((void *)ffr_reg, rval * (i + 1), ffr_size);
+	}
+	sve_ffr_regs_write(ffr_regs);
+}
+
+/*
+ * Compare Z registers passed in 's1' (old values) with 's2' (new values).
+ *
+ * Returns:
+ * 0		: All Z[0-31] registers in 's1' and 's2' are equal
+ * nonzero	: Sets the Nth bit of the Z register that is not equal
+ */
+uint64_t sve_z_regs_compare(const sve_z_regs_t *s1, const sve_z_regs_t *s2)
+{
+	uint32_t z_size;
+	uint64_t cmp_bitmap = 0UL;
+
+	z_size = (uint32_t)sve_rdvl_1();
+
+	for (uint32_t i = 0U; i < SVE_NUM_VECTORS; i++) {
+		uint8_t *s1_z = (uint8_t *)s1 + (i * z_size);
+		uint8_t *s2_z = (uint8_t *)s2 + (i * z_size);
+
+		if ((memcmp(s1_z, s2_z, z_size) == 0)) {
+			continue;
+		}
+
+		cmp_bitmap |= BIT_64(i);
+		VERBOSE("SVE Z_%u mismatch\n", i);
+	}
+
+	return cmp_bitmap;
+}
+
+/*
+ * Compare P registers passed in 's1' (old values) with 's2' (new values).
+ *
+ * Returns:
+ * 0		: All P[0-15] registers in 's1' and 's2' are equal
+ * nonzero	: Sets the Nth bit of the P register that is not equal
+ */
+uint64_t sve_p_regs_compare(const sve_p_regs_t *s1, const sve_p_regs_t *s2)
+{
+	uint32_t p_size;
+	uint64_t cmp_bitmap = 0UL;
+
+	/* Size of one predicate register 1/8 of Z register */
+	p_size = (uint32_t)sve_rdvl_1() / 8U;
+
+	for (uint32_t i = 0U; i < SVE_NUM_P_REGS; i++) {
+		uint8_t *s1_p = (uint8_t *)s1 + (i * p_size);
+		uint8_t *s2_p = (uint8_t *)s2 + (i * p_size);
+
+		if ((memcmp(s1_p, s2_p, p_size) == 0)) {
+			continue;
+		}
+
+		cmp_bitmap |= BIT_64(i);
+		VERBOSE("SVE P_%u mismatch\n", i);
+	}
+
+	return cmp_bitmap;
+}
+
+/*
+ * Compare FFR register passed in 's1' (old values) with 's2' (new values).
+ *
+ * Returns:
+ * 0		: FFR register in 's1' and 's2' are equal
+ * nonzero	: FFR register is not equal
+ */
+uint64_t sve_ffr_regs_compare(const sve_ffr_regs_t *s1, const sve_ffr_regs_t *s2)
+{
+	uint32_t ffr_size;
+	uint64_t cmp_bitmap = 0UL;
+
+	/* Size of one FFR register 1/8 of Z register */
+	ffr_size = (uint32_t)sve_rdvl_1() / 8U;
+
+	for (uint32_t i = 0U; i < SVE_NUM_FFR_REGS; i++) {
+		uint8_t *s1_ffr = (uint8_t *)s1 + (i * ffr_size);
+		uint8_t *s2_ffr = (uint8_t *)s2 + (i * ffr_size);
+
+		if ((memcmp(s1_ffr, s2_ffr, ffr_size) == 0)) {
+			continue;
+		}
+
+		cmp_bitmap |= BIT_64(i);
+		VERBOSE("SVE FFR_%u mismatch:\n", i);
+	}
+
+	return cmp_bitmap;
+}
diff --git a/tftf/tests/runtime_services/realm_payload/host_realm_payload_sve_tests.c b/tftf/tests/runtime_services/realm_payload/host_realm_payload_sve_tests.c
index 5e9d4fd..03de96a 100644
--- a/tftf/tests/runtime_services/realm_payload/host_realm_payload_sve_tests.c
+++ b/tftf/tests/runtime_services/realm_payload/host_realm_payload_sve_tests.c
@@ -472,8 +472,8 @@
 test_result_t host_sve_realm_check_vectors_leaked(void)
 {
 	u_register_t rmi_feat_reg0;
-	uint8_t *regs_base_wr, *regs_base_rd;
 	test_result_t rc;
+	uint64_t bitmap;
 	bool realm_rc;
 	uint8_t sve_vq;
 
@@ -520,17 +520,13 @@
 	 *    be either 0 or the old values filled by NS world.
 	 *    TODO: check if upper bits are zero
 	 */
-	regs_base_wr = (uint8_t *)&ns_sve_z_regs_write;
-	regs_base_rd = (uint8_t *)&ns_sve_z_regs_read;
-
-	rc = TEST_RESULT_SUCCESS;
-	for (int i = 0U; i < SVE_NUM_VECTORS; i++) {
-		if (memcmp(regs_base_wr + (i * SVE_VQ_TO_BYTES(sve_vq)),
-			   regs_base_rd + (i * SVE_VQ_TO_BYTES(sve_vq)),
-			   SVE_VQ_TO_BYTES(sve_vq)) != 0) {
-			ERROR("SVE Z%d mismatch\n", i);
-			rc = TEST_RESULT_FAIL;
-		}
+	bitmap = sve_z_regs_compare(&ns_sve_z_regs_write, &ns_sve_z_regs_read);
+	if (bitmap != 0UL) {
+		ERROR("SVE Z regs compare failed (bitmap: 0x%016llx)\n",
+		      bitmap);
+		rc = TEST_RESULT_FAIL;
+	} else {
+		rc = TEST_RESULT_SUCCESS;
 	}
 
 rm_realm:
diff --git a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
index bc3bb52..cfc931f 100644
--- a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
+++ b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
@@ -19,15 +19,6 @@
 
 static const struct ffa_uuid expected_sp_uuids[] = { {PRIMARY_UUID} };
 
-static test_result_t fp_vector_compare(uint8_t *a, uint8_t *b,
-	size_t vector_size, uint8_t vectors_num)
-{
-	if (memcmp(a, b, vector_size * vectors_num) != 0) {
-		return TEST_RESULT_FAIL;
-	}
-	return TEST_RESULT_SUCCESS;
-}
-
 static sve_z_regs_t sve_vectors_input;
 static sve_z_regs_t sve_vectors_output;
 static int sve_op_1[NS_SVE_OP_ARRAYSIZE];
@@ -139,9 +130,11 @@
 	sve_z_regs_read(&sve_vectors_output);
 
 	/* Compare to state before calling into secure world. */
-	return fp_vector_compare((uint8_t *)sve_vectors_input,
-				 (uint8_t *)sve_vectors_output,
-				 vl, SVE_NUM_VECTORS);
+	if (sve_z_regs_compare(&sve_vectors_input, &sve_vectors_output) != 0UL) {
+		return TEST_RESULT_FAIL;
+	}
+
+	return TEST_RESULT_SUCCESS;
 }
 
 /*