refactor(sve): move sve operations to a lib routine

This patch moves the SVE subtract operation to a common sve library
routine and takes a callback function that does the world switch while
SVE operations are done in a loop.

The callback is invoked after z0, z1 vectors are loaded and before
the calculated results are stored back in the vector registers.

This refactoring later helps to use this function to do context switch
from NS to Secure world or from NS to Realm world based on the
callback type.

This patch also moves the SVE fill vector registers, read vector
registers to a common sve library routine.

Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: Iceb34b96fa85597be63a50c429ae0eb29f8fcaf8
diff --git a/tftf/framework/framework.mk b/tftf/framework/framework.mk
index ab9033a..ddae823 100644
--- a/tftf/framework/framework.mk
+++ b/tftf/framework/framework.mk
@@ -88,7 +88,8 @@
 	lib/extensions/sme/aarch64/sme2.c				\
 	lib/extensions/sme/aarch64/sme_helpers.S			\
 	lib/extensions/sme/aarch64/sme2_helpers.S			\
-	lib/extensions/sve/aarch64/sve.c
+	lib/extensions/sve/aarch64/sve.c				\
+	lib/extensions/sve/aarch64/sve_helpers.S
 endif
 
 TFTF_LINKERFILE		:=	tftf/framework/tftf.ld.S
diff --git a/tftf/tests/extensions/sve/sve_operations.S b/tftf/tests/extensions/sve/sve_operations.S
deleted file mode 100644
index e528b2b..0000000
--- a/tftf/tests/extensions/sve/sve_operations.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#include "./test_sve.h"
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
-	.arch armv8.2-a+crc+fp16+sve
-	.global	sve_subtract_arrays
-func sve_subtract_arrays
-	mov	x4, SVE_ARRAYSIZE
-	mov	x5, x4
-	mov	x3, 0
-	whilelo	p0.s, xzr, x4
-.loop:
-	ld1w	z0.s, p0/z, [x1, x3, lsl 2]
-	ld1w	z1.s, p0/z, [x2, x3, lsl 2]
-	sub	z0.s, z0.s, z1.s
-	st1w	z0.s, p0, [x0, x3, lsl 2]
-	incw	x3
-	whilelo	p0.s, x3, x5
-	bne	.loop
-	ret
-endfunc sve_subtract_arrays
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/extensions/sve/test_sve.c b/tftf/tests/extensions/sve/test_sve.c
index eabc0de..68ab775 100644
--- a/tftf/tests/extensions/sve/test_sve.c
+++ b/tftf/tests/extensions/sve/test_sve.c
@@ -15,9 +15,6 @@
 
 #if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
 
-extern void sve_subtract_arrays(int *difference, const int *sve_op_1,
-				const int *sve_op_2);
-
 static int sve_difference[SVE_ARRAYSIZE];
 static int sve_op_1[SVE_ARRAYSIZE];
 static int sve_op_2[SVE_ARRAYSIZE];
@@ -43,7 +40,7 @@
 	}
 
 	/* Perform SVE operations */
-	sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2);
+	sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2, SVE_ARRAYSIZE);
 
 	return TEST_RESULT_SUCCESS;
 }
diff --git a/tftf/tests/runtime_services/secure_service/spm_common.c b/tftf/tests/runtime_services/secure_service/spm_common.c
index 60b77b8..0e1c694 100644
--- a/tftf/tests/runtime_services/secure_service/spm_common.c
+++ b/tftf/tests/runtime_services/secure_service/spm_common.c
@@ -103,90 +103,6 @@
 		ret.arg7);
 }
 
-void fill_sve_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
-	__asm__ volatile(
-		".arch_extension sve\n"
-		fill_sve_helper(0)
-		fill_sve_helper(1)
-		fill_sve_helper(2)
-		fill_sve_helper(3)
-		fill_sve_helper(4)
-		fill_sve_helper(5)
-		fill_sve_helper(6)
-		fill_sve_helper(7)
-		fill_sve_helper(8)
-		fill_sve_helper(9)
-		fill_sve_helper(10)
-		fill_sve_helper(11)
-		fill_sve_helper(12)
-		fill_sve_helper(13)
-		fill_sve_helper(14)
-		fill_sve_helper(15)
-		fill_sve_helper(16)
-		fill_sve_helper(17)
-		fill_sve_helper(18)
-		fill_sve_helper(19)
-		fill_sve_helper(20)
-		fill_sve_helper(21)
-		fill_sve_helper(22)
-		fill_sve_helper(23)
-		fill_sve_helper(24)
-		fill_sve_helper(25)
-		fill_sve_helper(26)
-		fill_sve_helper(27)
-		fill_sve_helper(28)
-		fill_sve_helper(29)
-		fill_sve_helper(30)
-		fill_sve_helper(31)
-		".arch_extension nosve\n"
-		: : "r" (v));
-#endif
-}
-
-void read_sve_vector_regs(sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
-	__asm__ volatile(
-		".arch_extension sve\n"
-		read_sve_helper(0)
-		read_sve_helper(1)
-		read_sve_helper(2)
-		read_sve_helper(3)
-		read_sve_helper(4)
-		read_sve_helper(5)
-		read_sve_helper(6)
-		read_sve_helper(7)
-		read_sve_helper(8)
-		read_sve_helper(9)
-		read_sve_helper(10)
-		read_sve_helper(11)
-		read_sve_helper(12)
-		read_sve_helper(13)
-		read_sve_helper(14)
-		read_sve_helper(15)
-		read_sve_helper(16)
-		read_sve_helper(17)
-		read_sve_helper(18)
-		read_sve_helper(19)
-		read_sve_helper(20)
-		read_sve_helper(21)
-		read_sve_helper(22)
-		read_sve_helper(23)
-		read_sve_helper(24)
-		read_sve_helper(25)
-		read_sve_helper(26)
-		read_sve_helper(27)
-		read_sve_helper(28)
-		read_sve_helper(29)
-		read_sve_helper(30)
-		read_sve_helper(31)
-		".arch_extension nosve\n"
-		: : "r" (v));
-#endif
-}
-
 /*
  * check_spmc_execution_level
  *
diff --git a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S b/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
deleted file mode 100644
index f538b2c..0000000
--- a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2022, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-#define SVE_ARRAYSIZE 1024
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
-	.arch armv8.2-a+crc+fp16+sve
-	.global	sve_subtract_interleaved_smc
-func sve_subtract_interleaved_smc
-	mov	x4, SVE_ARRAYSIZE
-	mov	x5, x4
-	mov	x3, 0
-	whilelo	p0.s, xzr, x4
-.loop:
-	ld1w	z0.s, p0/z, [x1, x3, lsl 2]
-	ld1w	z1.s, p0/z, [x2, x3, lsl 2]
-	sub	z0.s, z0.s, z1.s
-	st1w	z0.s, p0, [x0, x3, lsl 2]
-	incw	x3
-
-	stp x0, x1, [sp, #-48]!
-	stp x2, x3, [sp, #16]
-	stp x4, x5, [sp, #32]
-
-	/*
-	 * Forge a FF-A direct request with a command for cactus to fill SIMD
-	 * vectors in the secure world.
-	 */
-	mov w0, #0x6f                   /* FFA_MSG_SEND_DIRECT_REQ_SMC32 */
-	movk w0, #0x8400, lsl #16
-	mov     x1, #0x8001             /* src: nwd, dest: SP1 */
-	mov     x2, xzr
-	mov     x3, #0x4d44
-	movk    w3, #0x5349, lsl #16    /* CACTUS_REQ_SIMD_FILL_CMD */
-	smc     #0
-	and     w1, w0, #0xffff
-	cmp     w1, #0x70               /* FFA_MSG_SEND_DIRECT_RESP_SMC32 (low 16bits) */
-	bne     .			/* Test hangs if direct response not received */
-	cmp	w3, #0x0		/* Check CACTUS_SUCCESS (0x0) returned */
-	bne	.
-	ldp     x4, x5, [sp, #32]
-	ldp     x2, x3, [sp, #16]
-	ldp     x0, x1, [sp], #48
-
-	whilelo	p0.s, x3, x5
-	bne	.loop
-	ret
-endfunc sve_subtract_interleaved_smc
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
index 8cb54f7..8f090a2 100644
--- a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
+++ b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
@@ -9,17 +9,15 @@
 #include <ffa_helpers.h>
 #include <fpu.h>
 #include <test_helpers.h>
+#include <lib/extensions/sve.h>
 
 #define SENDER HYP_ID
 #define RECEIVER SP_ID(1)
 #define SVE_TEST_ITERATIONS	100
-#define SVE_ARRAYSIZE		1024
+#define NS_SVE_OP_ARRAYSIZE		1024
 
 static const struct ffa_uuid expected_sp_uuids[] = { {PRIMARY_UUID} };
 
-extern void sve_subtract_interleaved_smc(int *difference, const int *sve_op_1,
-				       const int *sve_op_2);
-
 static test_result_t fp_vector_compare(uint8_t *a, uint8_t *b,
 	size_t vector_size, uint8_t vectors_num)
 {
@@ -31,8 +29,8 @@
 
 static sve_vector_t sve_vectors_input[SVE_NUM_VECTORS] __aligned(16);
 static sve_vector_t sve_vectors_output[SVE_NUM_VECTORS] __aligned(16);
-static int sve_op_1[SVE_ARRAYSIZE];
-static int sve_op_2[SVE_ARRAYSIZE];
+static int sve_op_1[NS_SVE_OP_ARRAYSIZE];
+static int sve_op_2[NS_SVE_OP_ARRAYSIZE];
 static fpu_reg_state_t g_fpu_template;
 
 /*
@@ -114,7 +112,7 @@
 	}
 
 	/* Fill SVE vector registers with the buffer contents prepared above. */
-	fill_sve_vector_regs(sve_vectors_input);
+	sve_fill_vector_regs(sve_vectors_input);
 
 	/*
 	 * Call cactus secure partition which uses SIMD (and expect it doesn't
@@ -131,7 +129,7 @@
 	}
 
 	/* Get the SVE vectors state after returning to normal world. */
-	read_sve_vector_regs(sve_vectors_output);
+	sve_read_vector_regs(sve_vectors_output);
 
 	/* Compare to state before calling into secure world. */
 	return fp_vector_compare((uint8_t *)sve_vectors_input,
@@ -140,12 +138,36 @@
 }
 
 /*
+ * Sends SIMD fill command to Cactus SP
+ * Returns:
+ *	false - On success
+ *	true  - On failure
+ */
+#ifdef __aarch64__
+static bool callback_enter_cactus_sp(void)
+{
+	struct ffa_value ret = cactus_req_simd_fill_send_cmd(SENDER, RECEIVER);
+
+	if (!is_ffa_direct_response(ret)) {
+		return true;
+	}
+
+	if (cactus_get_response(ret) == CACTUS_ERROR) {
+		return true;
+	}
+
+	return false;
+}
+#endif /* __aarch64__ */
+
+/*
  * Tests that SVE vector operations in normal world are not affected by context
  * switches between normal world and the secure world.
  */
 test_result_t test_sve_vectors_operations(void)
 {
 	unsigned int val;
+	bool cb_err;
 
 	SKIP_TEST_IF_SVE_NOT_SUPPORTED();
 
@@ -156,7 +178,7 @@
 
 	val = 2 * SVE_TEST_ITERATIONS;
 
-	for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+	for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
 		sve_op_1[i] = val;
 		sve_op_2[i] = 1;
 	}
@@ -167,11 +189,19 @@
 
 	for (unsigned int i = 0; i < SVE_TEST_ITERATIONS; i++) {
 		/* Perform SVE operations with intermittent calls to Swd. */
-		sve_subtract_interleaved_smc(sve_op_1, sve_op_1, sve_op_2);
+		cb_err = sve_subtract_arrays_interleaved(sve_op_1, sve_op_1,
+							 sve_op_2,
+							 NS_SVE_OP_ARRAYSIZE,
+							 &callback_enter_cactus_sp);
+		if (cb_err == true) {
+			ERROR("Callback to Cactus SP failed\n");
+			return TEST_RESULT_FAIL;
+		}
+
 	}
 
 	/* Check result of SVE operations. */
-	for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+	for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
 		if (sve_op_1[i] != (val - SVE_TEST_ITERATIONS)) {
 			return TEST_RESULT_FAIL;
 		}
diff --git a/tftf/tests/tests-cpu-extensions.mk b/tftf/tests/tests-cpu-extensions.mk
index f838b4b..0b1839a 100644
--- a/tftf/tests/tests-cpu-extensions.mk
+++ b/tftf/tests/tests-cpu-extensions.mk
@@ -13,7 +13,6 @@
 	extensions/pmuv3/test_pmuv3.c					\
 	extensions/mte/test_mte.c					\
 	extensions/pauth/test_pauth.c					\
-	extensions/sve/sve_operations.S					\
 	extensions/sme/test_sme.c					\
 	extensions/sme/test_sme2.c					\
 	extensions/spe/test_spe.c					\
diff --git a/tftf/tests/tests-spm.mk b/tftf/tests/tests-spm.mk
index 737c4cb..c0a7eb0 100644
--- a/tftf/tests/tests-spm.mk
+++ b/tftf/tests/tests-spm.mk
@@ -27,7 +27,6 @@
 TESTS_SOURCES   +=                                                      \
         $(addprefix tftf/tests/runtime_services/secure_service/,        \
 	  test_spm_cpu_features.c					\
-	  sve_operations_cactus.S					\
 	 )
 
 TESTS_SOURCES	+=							\