feat(sve): manage SVE context

Enabling SVE support in Hafnium, save/restore vector and predicate
registers for context switch between NWd and SWd.
It is assumed that SVE vector length is 512b. This will be
platform-specific define in the future.

Signed-off-by: Maksims Svecovs <maksims.svecovs@arm.com>
Change-Id: Ib2833fb0744534a05dbaaa5c836f81c50f1e14db
diff --git a/inc/hf/arch/other_world.h b/inc/hf/arch/other_world.h
index 68c0b45..a82d607 100644
--- a/inc/hf/arch/other_world.h
+++ b/inc/hf/arch/other_world.h
@@ -11,6 +11,15 @@
 #include "hf/ffa.h"
 #include "hf/vm.h"
 
+struct sve_other_world_context_t {
+	uint8_t vectors[32][HF_SVE_VECTOR_LENGTH / 8];
+
+	/* FFR and predicates are one-eigth of the SVE vector length */
+	uint8_t ffr[HF_SVE_VECTOR_LENGTH / 64];
+
+	uint8_t predicates[16][HF_SVE_VECTOR_LENGTH / 64];
+} __attribute__((aligned(16)));
+
 void arch_other_world_init(void);
 bool arch_other_world_vm_init(struct vm *other_world_vm, struct mpool *ppool);
 struct ffa_value arch_other_world_call(struct ffa_value args);
diff --git a/inc/vmapi/hf/types.h b/inc/vmapi/hf/types.h
index 121af9d..179c8ec 100644
--- a/inc/vmapi/hf/types.h
+++ b/inc/vmapi/hf/types.h
@@ -55,6 +55,9 @@
 /** The virtual interrupt ID used for managed exit. */
 #define HF_MANAGED_EXIT_INTID 4
 
+/** SVE vector size supported. */
+#define HF_SVE_VECTOR_LENGTH 512
+
 /** Type of interrupts */
 enum interrupt_type {
 	INTERRUPT_TYPE_IRQ,
diff --git a/src/arch/aarch64/exception_macros.S b/src/arch/aarch64/exception_macros.S
index 9a9336c..8ed785a 100644
--- a/src/arch/aarch64/exception_macros.S
+++ b/src/arch/aarch64/exception_macros.S
@@ -58,6 +58,90 @@
 .endm
 
 /**
+ * Helper macros for SIMD vectors save/restore operations.
+ */
+.macro simd_op_vectors op:req reg:req
+	\op q0, q1, [\reg], #32
+	\op q2, q3, [\reg], #32
+	\op q4, q5, [\reg], #32
+	\op q6, q7, [\reg], #32
+	\op q8, q9, [\reg], #32
+	\op q10, q11, [\reg], #32
+	\op q12, q13, [\reg], #32
+	\op q14, q15, [\reg], #32
+	\op q16, q17, [\reg], #32
+	\op q18, q19, [\reg], #32
+	\op q20, q21, [\reg], #32
+	\op q22, q23, [\reg], #32
+	\op q24, q25, [\reg], #32
+	\op q26, q27, [\reg], #32
+	\op q28, q29, [\reg], #32
+	\op q30, q31, [\reg], #32
+.endm
+
+/**
+ * Helper macros for SVE vectors save/restore operations.
+ */
+.macro sve_op_vectors op:req reg:req
+.arch_extension sve
+	\op z0, [\reg, #0, MUL VL]
+	\op z1, [\reg, #1, MUL VL]
+	\op z2, [\reg, #2, MUL VL]
+	\op z3, [\reg, #3, MUL VL]
+	\op z4, [\reg, #4, MUL VL]
+	\op z5, [\reg, #5, MUL VL]
+	\op z6, [\reg, #6, MUL VL]
+	\op z7, [\reg, #7, MUL VL]
+	\op z8, [\reg, #8, MUL VL]
+	\op z9, [\reg, #9, MUL VL]
+	\op z10, [\reg, #10, MUL VL]
+	\op z11, [\reg, #11, MUL VL]
+	\op z12, [\reg, #12, MUL VL]
+	\op z13, [\reg, #13, MUL VL]
+	\op z14, [\reg, #14, MUL VL]
+	\op z15, [\reg, #15, MUL VL]
+	\op z16, [\reg, #16, MUL VL]
+	\op z17, [\reg, #17, MUL VL]
+	\op z18, [\reg, #18, MUL VL]
+	\op z19, [\reg, #19, MUL VL]
+	\op z20, [\reg, #20, MUL VL]
+	\op z21, [\reg, #21, MUL VL]
+	\op z22, [\reg, #22, MUL VL]
+	\op z23, [\reg, #23, MUL VL]
+	\op z24, [\reg, #24, MUL VL]
+	\op z25, [\reg, #25, MUL VL]
+	\op z26, [\reg, #26, MUL VL]
+	\op z27, [\reg, #27, MUL VL]
+	\op z28, [\reg, #28, MUL VL]
+	\op z29, [\reg, #29, MUL VL]
+	\op z30, [\reg, #30, MUL VL]
+	\op z31, [\reg, #31, MUL VL]
+
+.endm
+
+/**
+ * Helper macros for SVE predicates save/restore operations.
+ */
+.macro sve_predicate_op op:req reg:req
+	\op p0, [\reg, #0, MUL VL]
+	\op p1, [\reg, #1, MUL VL]
+	\op p2, [\reg, #2, MUL VL]
+	\op p3, [\reg, #3, MUL VL]
+	\op p4, [\reg, #4, MUL VL]
+	\op p5, [\reg, #5, MUL VL]
+	\op p6, [\reg, #6, MUL VL]
+	\op p7, [\reg, #7, MUL VL]
+	\op p8, [\reg, #8, MUL VL]
+	\op p9, [\reg, #9, MUL VL]
+	\op p10, [\reg, #10, MUL VL]
+	\op p11, [\reg, #11, MUL VL]
+	\op p12, [\reg, #12, MUL VL]
+	\op p13, [\reg, #13, MUL VL]
+	\op p14, [\reg, #14, MUL VL]
+	\op p15, [\reg, #15, MUL VL]
+.endm
+
+/**
  * Restores the volatile registers from the stack. This currently takes 14
  * instructions, so it can be used in exception handlers while still leaving 18
  * instructions left; if paired with save_volatile_to_stack, there are 4
diff --git a/src/arch/aarch64/hypervisor/exceptions.S b/src/arch/aarch64/hypervisor/exceptions.S
index 54b0deb..4be3380 100644
--- a/src/arch/aarch64/hypervisor/exceptions.S
+++ b/src/arch/aarch64/hypervisor/exceptions.S
@@ -13,6 +13,13 @@
 #include "msr.h"
 #include "exception_macros.S"
 
+
+/**
+ * PE feature information about SVE implementation in AArch64 state.
+ */
+#define ID_AA64PFR0_SVE_SHIFT (32)
+#define ID_AA64PFR0_SVE_LENGTH (4)
+
 /**
  * Saves the volatile registers into the register buffer of the current vCPU.
  */
@@ -106,28 +113,6 @@
 .endm
 
 /**
- * Helper macro for SIMD vectors save/restore operations.
- */
-.macro simd_op_vectors op reg
-	\op q0, q1, [\reg], #32
-	\op q2, q3, [\reg], #32
-	\op q4, q5, [\reg], #32
-	\op q6, q7, [\reg], #32
-	\op q8, q9, [\reg], #32
-	\op q10, q11, [\reg], #32
-	\op q12, q13, [\reg], #32
-	\op q14, q15, [\reg], #32
-	\op q16, q17, [\reg], #32
-	\op q18, q19, [\reg], #32
-	\op q20, q21, [\reg], #32
-	\op q22, q23, [\reg], #32
-	\op q24, q25, [\reg], #32
-	\op q26, q27, [\reg], #32
-	\op q28, q29, [\reg], #32
-	\op q30, q31, [\reg], #32
-.endm
-
-/**
  * The following is the exception table. A pointer to it will be stored in
  * register vbar_el2.
  */
@@ -396,9 +381,10 @@
 	 */
 
 other_world_loop:
-	/*
-	 * x19 holds the other world VM vCPU pointer.
-	 */
+	/* Check if SVE is implemented. */
+	mrs x0, id_aa64pfr0_el1
+	ubfx x0, x0, ID_AA64PFR0_SVE_SHIFT, ID_AA64PFR0_SVE_LENGTH
+	cbnz x0, sve_context_restore
 
 	/* Restore the other world SIMD context to the other world VM vCPU. */
 	add x18, x19, #VCPU_FREGS
@@ -406,8 +392,32 @@
 	ldp x0, x1, [x18]
 	msr fpsr, x0
 	msr fpcr, x1
+	b sve_skip_context_restore
 
-	/* Prepare arguments from other world VM vCPU. */
+	/* Restore the other world SVE context from internal buffer. */
+sve_context_restore:
+	adrp x18, sve_other_world_context
+	add x18, x18, :lo12: sve_other_world_context
+	ldr x0, [x19, #VCPU_CPU]
+	bl cpu_index
+	mov x20, #SVE_CTX_SIZE
+	madd x18, x0, x20, x18
+
+	/* Restore vector registers. */
+	sve_op_vectors ldr, x18
+	/* Restore FFR register before predicates. */
+	add x20, x18, #SVE_CTX_FFR
+	ldr p0, [x20]
+	wrffr p0.b
+	/* Restore predicate registers. */
+	add x20, x18, #SVE_CTX_PREDICATES
+	sve_predicate_op ldr, x20
+
+	/*
+	 * Prepare arguments from other world VM vCPU.
+	 * x19 holds the other world VM vCPU pointer.
+	 */
+sve_skip_context_restore:
 	ldp x0, x1, [x19, #VCPU_REGS + 8 * 0]
 	ldp x2, x3, [x19, #VCPU_REGS + 8 * 2]
 	ldp x4, x5, [x19, #VCPU_REGS + 8 * 4]
@@ -443,12 +453,39 @@
 	stp x4, x5, [x19, #VCPU_REGS + 8 * 4]
 	stp x6, x7, [x19, #VCPU_REGS + 8 * 6]
 
+	/* Check if SVE is implemented. */
+	mrs x0, id_aa64pfr0_el1
+	ubfx x0, x0, ID_AA64PFR0_SVE_SHIFT, ID_AA64PFR0_SVE_LENGTH
+	cbnz x0, sve_context_save
+
 	/* Save the other world SIMD context to the other world VM vCPU. */
 	add x18, x19, #VCPU_FREGS
 	simd_op_vectors stp, x18
 	mrs x0, fpsr
 	mrs x1, fpcr
 	stp x0, x1, [x18]
+	b sve_skip_context_save
+
+	/* Save the other world SVE context to internal buffer. */
+sve_context_save:
+	adrp x18, sve_other_world_context
+	add x18, x18, :lo12: sve_other_world_context
+	ldr x0, [x19, #VCPU_CPU]
+	bl cpu_index
+	mov x20, #SVE_CTX_SIZE
+	madd x18, x0, x20, x18
+
+	/* Save vector registers. */
+	sve_op_vectors str, x18
+	/* Save predicate registers. */
+	add x20, x18, #SVE_CTX_PREDICATES
+	sve_predicate_op str, x20
+	/* Save FFR register after predicates. */
+	add x20, x18, #SVE_CTX_FFR
+	rdffr p0.b
+	str p0, [x20]
+
+sve_skip_context_save:
 
 #if BRANCH_PROTECTION
 	pauth_restore_hypervisor_key x0 x1
diff --git a/src/arch/aarch64/hypervisor/offsets.c b/src/arch/aarch64/hypervisor/offsets.c
index 1d9c60f..f795bd9 100644
--- a/src/arch/aarch64/hypervisor/offsets.c
+++ b/src/arch/aarch64/hypervisor/offsets.c
@@ -6,6 +6,8 @@
  * https://opensource.org/licenses/BSD-3-Clause.
  */
 
+#include "hf/arch/other_world.h"
+
 #include "hf/cpu.h"
 #include "hf/offset_size_header.h"
 #include "hf/vm.h"
@@ -15,6 +17,7 @@
 DEFINE_OFFSETOF(CPU_ID, struct cpu, id)
 DEFINE_OFFSETOF(CPU_STACK_BOTTOM, struct cpu, stack_bottom)
 DEFINE_OFFSETOF(VCPU_VM, struct vcpu, vm)
+DEFINE_OFFSETOF(VCPU_CPU, struct vcpu, cpu)
 DEFINE_OFFSETOF(VCPU_REGS, struct vcpu, regs)
 DEFINE_OFFSETOF(VCPU_LAZY, struct vcpu, regs.lazy)
 DEFINE_OFFSETOF(VCPU_FREGS, struct vcpu, regs.fp)
@@ -27,3 +30,8 @@
 #if GIC_VERSION == 3 || GIC_VERSION == 4
 DEFINE_OFFSETOF(VCPU_GIC, struct vcpu, regs.gic)
 #endif
+
+DEFINE_SIZEOF(SVE_CTX_SIZE, struct sve_other_world_context_t)
+DEFINE_OFFSETOF(SVE_CTX_FFR, struct sve_other_world_context_t, ffr)
+DEFINE_OFFSETOF(SVE_CTX_PREDICATES, struct sve_other_world_context_t,
+		predicates)
diff --git a/src/arch/aarch64/hypervisor/other_world.c b/src/arch/aarch64/hypervisor/other_world.c
index e43d89a..ebe94ca 100644
--- a/src/arch/aarch64/hypervisor/other_world.c
+++ b/src/arch/aarch64/hypervisor/other_world.c
@@ -25,6 +25,12 @@
 alignas(PAGE_SIZE) static uint8_t other_world_recv_buffer[HF_MAILBOX_SIZE];
 
 #endif
+#if SECURE_WORLD == 1
+
+/** Other world SVE context (accessed from other_world_loop). */
+struct sve_other_world_context_t sve_other_world_context[MAX_CPUS];
+
+#endif
 
 void arch_other_world_init(void)
 {