SPMC: complete vCPU state save prior to normal world exit

This change addresses the generic SPMC problem mentioned in [1].
The current implementation leaves the live vCPU in a stale state prior
to leaving to the normal world. EL3/SPMD fortunately backs up both EL1
and EL2 although it looks cleaner for the SPMC to fully own saving of
the state. The other world loop is moved to after the vcpu_switch save
routine to permit unwinding the stack down to emitting SMC. The other
world arguments are held in the other world VM vCPU representing the
normal world. On returning from normal world, execution restarts after
the SMC and arguments are copied to the restored other world VM vCPU.
This solves problems like losing the state of the last SP when exiting
at boot time or upon a managed exit, and UP SP vCPU migration.

[1] https://git.trustedfirmware.org/hafnium/hafnium.git/
tree/src/arch/aarch64/hypervisor/handler.c?h=v2.4#n590

Change-Id: If3d4dd332233330a10cf2b66ce9c85480dff3793
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/inc/hf/vcpu.h b/inc/hf/vcpu.h
index 2f9ffb6..ea277c0 100644
--- a/inc/hf/vcpu.h
+++ b/inc/hf/vcpu.h
@@ -74,7 +74,10 @@
 	/*
 	 * Determine whether the 'regs' field is available for use. This is set
 	 * to false when a vCPU is about to run on a physical CPU, and is set
-	 * back to true when it is descheduled.
+	 * back to true when it is descheduled. This is not relevant for the
+	 * primary VM vCPUs in the normal world (or the "other world VM" vCPUs
+	 * in the secure world) as they are pinned to physical CPUs and there
+	 * is no contention to take care of.
 	 */
 	bool regs_available;
 
@@ -112,5 +115,4 @@
 bool vcpu_handle_page_fault(const struct vcpu *current,
 			    struct vcpu_fault_info *f);
 
-struct vcpu *vcpu_get_other_world_counterpart(struct vcpu *current);
 void vcpu_reset(struct vcpu *vcpu);
diff --git a/src/arch/aarch64/hypervisor/exceptions.S b/src/arch/aarch64/hypervisor/exceptions.S
index 9e279c2..0bc59c0 100644
--- a/src/arch/aarch64/hypervisor/exceptions.S
+++ b/src/arch/aarch64/hypervisor/exceptions.S
@@ -7,6 +7,9 @@
  */
 
 #include "hf/arch/offsets.h"
+
+#include "hf/arch/vmid_base.h"
+
 #include "msr.h"
 #include "exception_macros.S"
 
@@ -324,6 +327,60 @@
 	bl complete_saving_state
 	mov x0, x19
 
+#if SECURE_WORLD == 1
+
+	ldr x1, [x0, #VCPU_VM]
+	ldrh w1, [x1, #VM_ID]
+
+	/* Exit to normal world if VM is HF_OTHER_WORLD_ID. */
+	cmp w1, #HF_OTHER_WORLD_ID
+	bne vcpu_restore_all_and_run
+
+	/*
+	 * The current vCPU state is saved so it's now safe to switch to the
+	 * normal world.
+	 */
+
+other_world_loop:
+	/*
+	 * Prepare arguments from other world VM vCPU.
+	 * x19 holds the other world VM vCPU pointer.
+	 */
+	ldp x0, x1, [x19, #VCPU_REGS + 8 * 0]
+	ldp x2, x3, [x19, #VCPU_REGS + 8 * 2]
+	ldp x4, x5, [x19, #VCPU_REGS + 8 * 4]
+	ldp x6, x7, [x19, #VCPU_REGS + 8 * 6]
+
+	smc #0
+
+	/*
+	 * The call to EL3 returned, First eight GP registers contain an FF-A
+	 * call from the physical FF-A instance. Save those arguments to the
+	 * other world VM vCPU.
+	 * x19 is restored with the other world VM vCPU pointer.
+	 */
+	stp x0, x1, [x19, #VCPU_REGS + 8 * 0]
+	stp x2, x3, [x19, #VCPU_REGS + 8 * 2]
+	stp x4, x5, [x19, #VCPU_REGS + 8 * 4]
+	stp x6, x7, [x19, #VCPU_REGS + 8 * 6]
+
+	/*
+	 * Stack is at top and execution can restart straight into C code.
+	 * Handle the FF-A call from other world.
+	 */
+	mov x0, x19
+	bl smc_handler_from_nwd
+
+	/*
+	 * If the smc handler returns null this indicates no vCPU has to be
+	 * resumed and GP registers contain a fresh FF-A response or call
+	 * directed to the normal world. Hence loop back and emit SMC again.
+	 * Otherwise restore the vCPU pointed to by the handler return value.
+	 */
+	cbz x0, other_world_loop
+
+#endif
+
 	/* Intentional fallthrough. */
 .global vcpu_restore_all_and_run
 vcpu_restore_all_and_run:
diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c
index b33298c..1552e17 100644
--- a/src/arch/aarch64/hypervisor/handler.c
+++ b/src/arch/aarch64/hypervisor/handler.c
@@ -11,7 +11,6 @@
 #include "hf/arch/barriers.h"
 #include "hf/arch/init.h"
 #include "hf/arch/mmu.h"
-#include "hf/arch/other_world.h"
 #include "hf/arch/plat/smc.h"
 
 #include "hf/api.h"
@@ -476,63 +475,6 @@
 	return false;
 }
 
-#if SECURE_WORLD == 1
-
-/**
- * Called to switch to the other world and handle FF-A calls from it. Returns
- * when it is ready to run a secure partition again.
- *
- * Expects that `other_world_vcpu` points to the vCPU of the 'other world VM'
- * which corresponds to this physical CPU, and that `*next` is `NULL`.
- */
-static void other_world_switch_loop(struct vcpu *other_world_vcpu,
-				    struct vcpu **next)
-{
-	struct ffa_value other_world_args =
-		arch_regs_get_args(&other_world_vcpu->regs);
-
-	CHECK(!vm_id_is_current_world(other_world_vcpu->vm->id));
-	CHECK(*next == NULL);
-
-	while (*next == NULL) {
-		/*
-		 * Either we just entered the function or the last FF-A call
-		 * from the other world was handled and next is still NULL,
-		 * which means that the result should be passed back to the
-		 * other world.
-		 */
-		other_world_args = smc_ffa_call(other_world_args);
-
-		/*
-		 * The call to EL3 returned, thus other_world_args contains an
-		 * FF-A call from the physical FF-A instance. Handle it. At this
-		 * point *next is still NULL, which means that we will return
-		 * the result of the call back to EL3 unless the API handler
-		 * sets *next to something different.
-		 */
-		if (!ffa_handler(&other_world_args, other_world_vcpu, next)) {
-			other_world_args.func = SMCCC_ERROR_UNKNOWN;
-		}
-	}
-
-	/*
-	 * ffa_handler set *next to something, which means it wants to switch
-	 * back to an SP in EL1. It must be something in this world though, as
-	 * if it wanted to return back to the other world (where the last FF-A
-	 * call came from) it wouldn't have set *next at all.
-	 */
-	CHECK(vm_id_is_current_world((*next)->vm->id));
-
-	/*
-	 * Store the return value on the other world vCPU, ready for next time
-	 * we switch to it (in case they aren't overwritten at that point by
-	 * whatever API function decides to make the switch).
-	 */
-	arch_regs_set_retval(&other_world_vcpu->regs, other_world_args);
-}
-
-#endif
-
 /**
  * Set or clear VI bit according to pending interrupts.
  */
@@ -549,9 +491,7 @@
 		set_virtual_interrupt_current(
 			vcpu->interrupts.enabled_and_pending_count > 0);
 		sl_unlock(&vcpu->lock);
-	} else {
-		CHECK(vm_id_is_current_world(next->vm->id));
-
+	} else if (vm_id_is_current_world(next->vm->id)) {
 		/*
 		 * About to switch vCPUs, set the bit for the vCPU to which we
 		 * are switching in the saved copy of the register.
@@ -573,45 +513,16 @@
 static bool hvc_smc_handler(struct ffa_value args, struct vcpu *vcpu,
 			    struct vcpu **next)
 {
+	/* Do not expect PSCI calls emitted from within the secure world. */
+#if SECURE_WORLD == 0
 	if (psci_handler(vcpu, args.func, args.arg1, args.arg2, args.arg3,
 			 &vcpu->regs.r[0], next)) {
 		return true;
 	}
+#endif
 
 	if (ffa_handler(&args, vcpu, next)) {
 		arch_regs_set_retval(&vcpu->regs, args);
-
-#if SECURE_WORLD == 1
-		struct vcpu *other_world_vcpu =
-			vcpu_get_other_world_counterpart(current());
-
-		if (*next == other_world_vcpu) {
-			/*
-			 * TODO: Save non-volatile registers of current SP vCPU
-			 * before switching to normal world.
-			 * For now we rely on the SPMD in TF-A to do this at
-			 * EL3:
-			 * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/tree/services/std_svc/spmd/spmd_main.c#n121
-			 * This requires that the CTX_INCLUDE_FP_REGS option is
-			 * enabled in the TF-A build.
-			 */
-			api_regs_state_saved(vcpu);
-
-			*next = NULL;
-			other_world_switch_loop(other_world_vcpu, next);
-
-			/*
-			 * Whatever API function has requested a switch back to
-			 * the vCPU of an SP should have set regs_available to
-			 * false already. It must also have explicitly set a
-			 * next vCPU rather than leaving it NULL, because NULL
-			 * would have meant returning to the normal world.
-			 */
-			CHECK(next != NULL);
-			CHECK(!(*next)->regs_available);
-		}
-#endif
-
 		update_vi(*next);
 		return true;
 	}
@@ -642,6 +553,35 @@
 	return NULL;
 }
 
+#if SECURE_WORLD == 1
+
+/**
+ * Called from other_world_loop return from SMC.
+ * Processes SMC calls originating from the NWd.
+ */
+struct vcpu *smc_handler_from_nwd(struct vcpu *vcpu)
+{
+	struct ffa_value args = arch_regs_get_args(&vcpu->regs);
+	struct vcpu *next = NULL;
+
+	if (hvc_smc_handler(args, vcpu, &next)) {
+		return next;
+	}
+
+	/*
+	 * If the SMC emitted by the normal world is not handled in the secure
+	 * world then return an error stating such ABI is not supported. Only
+	 * FF-A calls are supported. We cannot return SMCCC_ERROR_UNKNOWN
+	 * directly because the SPMD smc handler would not recognize it as a
+	 * standard FF-A call returning from the SPMC.
+	 */
+	arch_regs_set_retval(&vcpu->regs, ffa_error(FFA_NOT_SUPPORTED));
+
+	return NULL;
+}
+
+#endif
+
 /*
  * Exception vector offsets.
  * See Arm Architecture Reference Manual Armv8-A, D1.10.2.
diff --git a/src/arch/aarch64/hypervisor/offsets.c b/src/arch/aarch64/hypervisor/offsets.c
index b306092..6b9275d 100644
--- a/src/arch/aarch64/hypervisor/offsets.c
+++ b/src/arch/aarch64/hypervisor/offsets.c
@@ -8,13 +8,17 @@
 
 #include "hf/cpu.h"
 #include "hf/offset_size_header.h"
+#include "hf/vm.h"
 
 DEFINE_OFFSETOF(CPU_ID, struct cpu, id)
 DEFINE_OFFSETOF(CPU_STACK_BOTTOM, struct cpu, stack_bottom)
+DEFINE_OFFSETOF(VCPU_VM, struct vcpu, vm)
 DEFINE_OFFSETOF(VCPU_REGS, struct vcpu, regs)
 DEFINE_OFFSETOF(VCPU_LAZY, struct vcpu, regs.lazy)
 DEFINE_OFFSETOF(VCPU_FREGS, struct vcpu, regs.fp)
 
+DEFINE_OFFSETOF(VM_ID, struct vm, id)
+
 #if GIC_VERSION == 3 || GIC_VERSION == 4
 DEFINE_OFFSETOF(VCPU_GIC, struct vcpu, regs.gic)
 #endif
diff --git a/src/vcpu.c b/src/vcpu.c
index 6b1e161..7fd2e75 100644
--- a/src/vcpu.c
+++ b/src/vcpu.c
@@ -184,18 +184,6 @@
 	return resume;
 }
 
-/**
- * Return the vCPU corresponding to the other world vCPU whose index
- * matches current vCPU index.
- */
-struct vcpu *vcpu_get_other_world_counterpart(struct vcpu *current)
-{
-	struct vm *vm = vm_find(HF_OTHER_WORLD_ID);
-	ffa_vcpu_index_t current_cpu_index = cpu_index(current->cpu);
-
-	return vm_get_vcpu(vm, current_cpu_index);
-}
-
 void vcpu_reset(struct vcpu *vcpu)
 {
 	arch_cpu_init(vcpu->cpu, ipa_init(0));